%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.101", %%% date = "20 January 2026", %%% time = "07:17:27 MDT", %%% filename = "tecs.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "34051 76278 399259 3791711", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; ACM Transactions on %%% Embedded Computing Systems (TECS)", %%% license = "public domain", %%% supported = "no", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% the journal ACM Transactions on Embedded %%% Computing Systems (no CODEN, ISSN 1539-9087 %%% (print), 1558-3465 (electronic)), for %%% 2002--date. %%% %%% Publication began with volume 1, number 1, %%% in November 2002. The journal appears %%% quarterly, in February, May, August, and %%% November. %%% %%% The journal has World-Wide Web sites at: %%% %%% http://www.acm.org/pubs/tecs %%% https://dl.acm.org/loi/tecs %%% %%% Tables-of-contents of all issues are %%% available at: %%% %%% http://www.acm.org/pubs/contents/journals/tecs/ %%% %%% Qualified subscribers can retrieve the full %%% text of recent articles in PDF form. %%% %%% At version 1.101, the COMPLETE journal %%% coverage looked like this: %%% %%% 2002 ( 7) 2011 ( 19) 2020 ( 64) %%% 2003 ( 24) 2012 ( 89) 2021 ( 59) %%% 2004 ( 36) 2013 ( 152) 2022 ( 86) %%% 2005 ( 39) 2014 ( 97) 2023 ( 165) %%% 2006 ( 30) 2015 ( 87) 2024 ( 104) %%% 2007 ( 39) 2016 ( 109) 2025 ( 173) %%% 2008 ( 47) 2017 ( 163) 2026 ( 17) %%% 2009 ( 39) 2018 ( 90) %%% 2010 ( 58) 2019 ( 122) %%% %%% Article: 1915 %%% %%% Total entries: 1915 %%% %%% Spelling has been verified with the UNIX %%% spell and GNU ispell programs using the %%% exception dictionary stored in the %%% companion file with extension .sok. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, using ``bibsort -byvolume.'' %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{"\input bibnames.sty" # "\ifx \undefined \pkg \def \pkg #1{{{\tt #1}}} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TECS = "ACM Transactions on Embedded Computing Systems"} %%% ==================================================================== %%% Bibliography entries: @Article{Wolf:2002:III, author = "Wayne Wolf", title = "Introduction to the inaugural issue", journal = j-TECS, volume = "1", number = "1", pages = "1--1", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jacob:2002:ITS, author = "Bruce Jacob and Shuvra Bhattacharyya", title = "Introduction to the two special issues on memory", journal = j-TECS, volume = "1", number = "1", pages = "2--5", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Avissar:2002:OMA, author = "Oren Avissar and Rajeev Barua and Dave Stewart", title = "An optimal memory allocation scheme for scratch-pad-based embedded systems", journal = j-TECS, volume = "1", number = "1", pages = "6--26", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2002:TGC, author = "G. Chen and R. Shetty and M. Kandemir and N. Vijaykrishnan and M. J. Irwin and M. Wolczko", title = "Tuning garbage collection for reducing memory system energy in an embedded {Java} environment", journal = j-TECS, volume = "1", number = "1", pages = "27--55", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2002:AAI, author = "Jung-Hoon Lee and Shin-Dug Kim and Charles Weems", title = "Application-adaptive intelligent cache memory system", journal = j-TECS, volume = "1", number = "1", pages = "56--78", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2002:FVL, author = "Jun Yang and Rajiv Gupta", title = "Frequent value locality and its applications", journal = j-TECS, volume = "1", number = "1", pages = "79--105", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ykman-Couvreur:2002:SLE, author = "Ch. Ykman-Couvreur and J. Lambrecht and A. {Van Der Togt} and F. Catthoor and H. {De Man}", title = "System-level exploration of association table implementations in telecom network applications", journal = j-TECS, volume = "1", number = "1", pages = "106--140", month = nov, year = "2002", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:40 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jacob:2003:ITS, author = "Bruce Jacob and Shuvra Bhattacharyya", title = "Introduction to the two special issues on memory", journal = j-TECS, volume = "2", number = "1", pages = "1--4", month = feb, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Benini:2003:EAD, author = "Luca Benini and Alberto Macii and Massimo Poncino", title = "Energy-aware design of embedded memories: a survey of technologies, architectures, and optimization techniques", journal = j-TECS, volume = "2", number = "1", pages = "5--32", month = feb, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Grun:2003:APB, author = "Peter Grun and Nikil Dutt and Alex Nicolau", title = "Access pattern-based memory and connectivity architecture exploration", journal = j-TECS, volume = "2", number = "1", pages = "33--73", month = feb, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Qu:2003:SSS, author = "Gang Qu and Miodrag Potkonjak", title = "System synthesis of synchronous multimedia applications", journal = j-TECS, volume = "2", number = "1", pages = "74--97", month = feb, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shim:2003:LEC, author = "Hojun Shim and Yongsoo Joo and Yongseok Choi and Hyung Gyu Lee and Naehyuck Chang", title = "Low-energy off-chip {SDRAM} memory systems for embedded applications", journal = j-TECS, volume = "2", number = "1", pages = "98--130", month = feb, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guang:2003:SIC, author = "Gao Guang and Trevor Mudge", title = "Special issue on compilers, architecture, and synthesis for embedded systems", journal = j-TECS, volume = "2", number = "2", pages = "131--131", month = may, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Franke:2003:ARH, author = "Bj{\"o}rn Franke and Michael O'Boyle", title = "Array recovery and high-level transformations for {DSP} applications", journal = j-TECS, volume = "2", number = "2", pages = "132--162", month = may, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2003:PIC, author = "Soontae Kim and N. Vijaykrishnan and Mahmut Kandemir and Anand Sivasubramaniam and Mary Jane Irwin", title = "Partitioned instruction cache architecture for energy efficiency", journal = j-TECS, volume = "2", number = "2", pages = "163--185", month = may, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rabbah:2003:DRD, author = "Rodric M. Rabbah and Krishna V. Palem", title = "Data remapping for design space optimization of embedded memory systems", journal = j-TECS, volume = "2", number = "2", pages = "186--218", month = may, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2003:SRM, author = "Qin Zhao and Bart Mesman and Twan Basten", title = "Static resource models for code-size efficient embedded processors", journal = j-TECS, volume = "2", number = "2", pages = "219--250", month = may, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:41 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jacome:2003:SIP, author = "Margarida Jacome and Francky Catthoor", title = "Special issue on power-aware embedded computing", journal = j-TECS, volume = "2", number = "3", pages = "251--254", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Martin:2003:CSS, author = "Thomas L. Martin and Daniel P. Siewiorek and Asim Smailagic and Matthew Bosworth and Matthew Ettus and Jolin Warren", title = "A case study of a system-level approach to power-aware computing", journal = j-TECS, volume = "2", number = "3", pages = "255--276", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rakhmatov:2003:EMB, author = "Daler Rakhmatov and Sarma Vrudhula", title = "Energy management for battery-powered embedded systems", journal = j-TECS, volume = "2", number = "3", pages = "277--324", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Irani:2003:OSD, author = "Sandy Irani and Sandeep Shukla and Rajesh Gupta", title = "Online strategies for dynamic power management in systems with multiple power-saving states", journal = j-TECS, volume = "2", number = "3", pages = "325--346", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2003:AMC, author = "Huiyang Zhou and Mark C. Toburen and Eric Rotenberg and Thomas M. Conte", title = "Adaptive mode control: a static-power-efficient cache design", journal = j-TECS, volume = "2", number = "3", pages = "347--372", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Unsal:2003:CCC, author = "Osman S. Unsal and Raksit Ashok and Israel Koren and C. Mani Krishna and Csaba Andras Moritz", title = "{Cool-Cache}: a compiler-enabled energy efficient data caching framework for embedded\slash multimedia processors", journal = j-TECS, volume = "2", number = "3", pages = "373--392", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yun:2003:EOV, author = "Han-Saem Yun and Jihong Kim", title = "On energy-optimal voltage scheduling for fixed-priority hard real-time systems", journal = j-TECS, volume = "2", number = "3", pages = "393--430", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schurgers:2003:PME, author = "Curt Schurgers and Vijay Raghunathan and Mani B. Srivastava", title = "Power management for energy-aware communication systems", journal = j-TECS, volume = "2", number = "3", pages = "431--447", month = aug, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 7 11:26:42 MDT 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gordon-Ross:2003:TIC, author = "Ann Gordon-Ross and Susan Cotterell and Frank Vahid", title = "Tiny instruction caches for low power embedded systems", journal = j-TECS, volume = "2", number = "4", pages = "449--481", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2003:CMC, author = "Kelvin Lin and Chung-Ping Chung and Jean Jyh-Jiun Shann", title = "Compressing {MIPS} code by multiple operand dependencies", journal = j-TECS, volume = "2", number = "4", pages = "482--508", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Musoll:2003:SRU, author = "Enric Musoll", title = "Speculating to reduce unnecessary power consumption", journal = j-TECS, volume = "2", number = "4", pages = "509--536", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rusu:2003:MRR, author = "Cosmin Rusu and Rami Melhem and Daniel Moss{\'e}", title = "Maximizing rewards for real-time applications with energy constraints", journal = j-TECS, volume = "2", number = "4", pages = "537--559", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Venkataramani:2003:ACC, author = "Girish Venkataramani and Walid Najjar and Fadi Kurdahi and Nader Bagherzadeh and Wim Bohm and Jeff Hammes", title = "Automatic compilation to a coarse-grained reconfigurable system-on-a-chip", journal = j-TECS, volume = "2", number = "4", pages = "560--589", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhuge:2003:CSR, author = "Qingfeng Zhuge and Bin Xiao and Edwin H.-M. Sha", title = "Code size reduction technique and implementation for software-pipelined {DSP} applications", journal = j-TECS, volume = "2", number = "4", pages = "590--613", month = nov, year = "2003", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 22 17:52:29 MST 2003", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2004:GES, author = "Rajesh Gupta", title = "Guest editorial: {Special} issue on networked embedded systems", journal = j-TECS, volume = "3", number = "1", pages = "1--2", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Raghunathan:2004:EEW, author = "Vijay Raghunathan and Saurabh Ganeriwal and Mani Srivastava and Curt Schurgers", title = "Energy efficient wireless packet scheduling and fair queuing", journal = j-TECS, volume = "3", number = "1", pages = "3--23", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bulusu:2004:SCL, author = "Nirupama Bulusu and John Heidemann and Deborah Estrin and Tommy Tran", title = "Self-configuring localization systems: Design and Experimental Evaluation", journal = j-TECS, volume = "3", number = "1", pages = "24--60", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zou:2004:SDT, author = "Yi Zou and Krishnendu Chakrabarty", title = "Sensor deployment and target localization in distributed sensor networks", journal = j-TECS, volume = "3", number = "1", pages = "61--91", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gebotys:2004:DSC, author = "Catherine H. Gebotys", title = "Design of secure cryptography against the threat of power-attacks in {DSP}-embedded processors", journal = j-TECS, volume = "3", number = "1", pages = "92--113", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2004:MVP, author = "Prabhat Mishra and Nikil Dutt", title = "Modeling and validation of pipeline specifications", journal = j-TECS, volume = "3", number = "1", pages = "114--139", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2004:PMC, author = "Prabhat Mishra and Mahesh Mamidipaka and Nikil Dutt", title = "Processor-memory coexploration using an architecture description language", journal = j-TECS, volume = "3", number = "1", pages = "140--162", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Naik:2004:CCS, author = "Mayur Naik and Jens Palsberg", title = "Compiling with code-size constraints", journal = j-TECS, volume = "3", number = "1", pages = "163--181", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schmitz:2004:ISO, author = "Marcus T. Schmitz and Bashir M. Al-Hashimi and Petru Eles", title = "Iterative schedule optimization for voltage scalable distributed embedded systems", journal = j-TECS, volume = "3", number = "1", pages = "182--217", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stitt:2004:ESS, author = "Greg Stitt and Frank Vahid and Shawn Nematbakhsh", title = "Energy savings and speedups from partitioning critical software loops to hardware in embedded systems", journal = j-TECS, volume = "3", number = "1", pages = "218--232", month = feb, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 6 07:14:21 MST 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lach:2004:ESI, author = "John Lach and Kia Bazargan", title = "Editorial: {Special} issue on dynamically adaptable embedded systems", journal = j-TECS, volume = "3", number = "2", pages = "233--236", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghiasi:2004:OAM, author = "Soheil Ghiasi and Ani Nahapetian and Majid Sarrafzadeh", title = "An optimal algorithm for minimizing run-time reconfiguration delay", journal = j-TECS, volume = "3", number = "2", pages = "237--256", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Robertson:2004:DFP, author = "Ian Robertson and James Irvine", title = "A design flow for partially reconfigurable hardware", journal = j-TECS, volume = "3", number = "2", pages = "257--283", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mejia-Alvarez:2004:ASS, author = "Pedro Mejia-Alvarez and Eugene Levner and Daniel Moss{\'e}", title = "Adaptive scheduling server for power-aware real-time tasks", journal = j-TECS, volume = "3", number = "2", pages = "284--306", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2004:BAP, author = "Fan Zhang and Samuel T. Chanson", title = "Blocking-aware processor voltage scheduling for real-time tasks", journal = j-TECS, volume = "3", number = "2", pages = "307--335", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2004:DAF, author = "Ying Zhang and Krishnendu Chakrabarty", title = "Dynamic adaptation for fault tolerance and power management in embedded real-time systems", journal = j-TECS, volume = "3", number = "2", pages = "336--360", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2004:DDR, author = "Zhining Huang and Sharad Malik and Nahri Moreano and Guido Araujo", title = "The design of dynamically reconfigurable datapath coprocessors", journal = j-TECS, volume = "3", number = "2", pages = "361--384", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Noguera:2004:MRA, author = "Juanjo Noguera and Rosa M. Badia", title = "Multitasking on reconfigurable architectures: microarchitecture support and dynamic scheduling", journal = j-TECS, volume = "3", number = "2", pages = "385--406", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2004:STC, author = "Chuanjun Zhang and Frank Vahid and Roman Lysecky", title = "A self-tuning cache architecture for embedded systems", journal = j-TECS, volume = "3", number = "2", pages = "407--425", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2004:AAA, author = "Tian He and Brian M. Blum and John A. Stankovic and Tarek Abdelzaher", title = "{AIDA}: {Adaptive} application-independent data aggregation in wireless sensor networks", journal = j-TECS, volume = "3", number = "2", pages = "426--457", month = may, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Serpanos:2004:GES, author = "Dimitrios N. Serpanos and Haris Lekatsas", title = "Guest editorial: {Special} issue on embedded systems and security", journal = j-TECS, volume = "3", number = "3", pages = "459--460", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ravi:2004:SES, author = "Srivaths Ravi and Anand Raghunathan and Paul Kocher and Sunil Hattangady", title = "Security in embedded systems: {Design} challenges", journal = j-TECS, volume = "3", number = "3", pages = "461--491", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Coron:2004:SSL, author = "Jean-Sebastien Coron and David Naccache and Paul Kocher", title = "Statistics and secret leakage", journal = j-TECS, volume = "3", number = "3", pages = "492--508", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wollinger:2004:EHC, author = "Thomas Wollinger and Jan Pelzl and Volker Wittelsberger and Christof Paar and G{\"o}kay Saldamli and {\c{C}}etin K. Ko{\c{c}}", title = "Elliptic and hyperelliptic curves on embedded {$ \mu $P}", journal = j-TECS, volume = "3", number = "3", pages = "509--533", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wollinger:2004:SFS, author = "Thomas Wollinger and Jorge Guajardo and Christof Paar", title = "Security on {FPGAs}: {State-of-the-art} implementations and attacks", journal = j-TECS, volume = "3", number = "3", pages = "534--574", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reyhani-Masoleh:2004:EDS, author = "Arash Reyhani-Masoleh and M. Anwar Hasan", title = "Efficient digit-serial normal basis multipliers over binary extension fields", journal = j-TECS, volume = "3", number = "3", pages = "575--592", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reyhani-Masoleh:2004:TFT, author = "Arash Reyhani-Masoleh and M. Anwar Hasan", title = "Towards fault-tolerant cryptographic computations over finite fields", journal = j-TECS, volume = "3", number = "3", pages = "593--613", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2004:FSM, author = "Rong-Tai Liu and Nen-Fu Huang and Chih-Hao Chen and Chia-Nan Kao", title = "A fast string-matching algorithm for network processor-based intrusion detection system", journal = j-TECS, volume = "3", number = "3", pages = "614--633", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2004:LLS, author = "Taejoon Park and Kang G. Shin", title = "{LiSP}: a lightweight security protocol for wireless sensor networks", journal = j-TECS, volume = "3", number = "3", pages = "634--660", month = aug, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:47 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Harkin:2004:MOR, author = "J. Harkin and T. M. McGinnity and L. P. Maguire", title = "Modeling and optimizing run-time reconfiguration using evolutionary computation", journal = j-TECS, volume = "3", number = "4", pages = "661--685", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Im:2004:DVS, author = "Chaeseok Im and Soonhoi Ha and Huiseok Kim", title = "Dynamic voltage scheduling with buffers in low-power multimedia applications", journal = j-TECS, volume = "3", number = "4", pages = "686--705", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Manolache:2004:SAA, author = "Sorin Manolache and Petru Eles and Zebo Peng", title = "Schedulability analysis of applications with stochastic task execution times", journal = j-TECS, volume = "3", number = "4", pages = "706--735", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Serpanos:2004:EHS, author = "Dimitrios N. Serpanos and Poluxeni Mountrouidou and Maria Gamvrili", title = "Evaluation of hardware and software schedulers for embedded switches", journal = j-TECS, volume = "3", number = "4", pages = "736--759", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lanotte:2004:IFH, author = "Ruggero Lanotte and Andrea Maggiolo-Schettini and Simone Tini", title = "Information flow in hybrid systems", journal = j-TECS, volume = "3", number = "4", pages = "760--799", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2004:MBA, author = "Donggang Liu and Peng Ning", title = "Multilevel {$ \mu $TESLA}: {Broadcast} authentication for distributed sensor networks", journal = j-TECS, volume = "3", number = "4", pages = "800--836", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2004:RTG, author = "Li-Pin Chang and Tei-Wei Kuo and Shi-Wu Lo", title = "Real-time garbage collection for flash-memory storage systems of real-time embedded systems", journal = j-TECS, volume = "3", number = "4", pages = "837--863", month = nov, year = "2004", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Oct 29 06:35:48 MDT 2004", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mueller:2005:ISI, author = "Frank Mueller and Per Stenstr{\"o}m", title = "Introduction to the special issue", journal = j-TECS, volume = "4", number = "1", pages = "1--2", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Krishnaswamy:2005:DCB, author = "Arvind Krishnaswamy and Rajiv Gupta", title = "Dynamic coalescing for 16-bit instructions", journal = j-TECS, volume = "4", number = "1", pages = "3--37", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Corliss:2005:IED, author = "Marc L. Corliss and E. Christopher Lewis and Amir Roth", title = "The implementation and evaluation of dynamic code decompression using {DISE}", journal = j-TECS, volume = "4", number = "1", pages = "38--72", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dhurjati:2005:MSG, author = "Dinakar Dhurjati and Sumant Kowshik and Vikram Adve and Chris Lattner", title = "Memory safety without garbage collection for embedded applications", journal = j-TECS, volume = "4", number = "1", pages = "73--111", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pop:2005:SDF, author = "Paul Pop and Petru Eles and Zebo Peng", title = "Schedulability-driven frame packing for multicluster distributed embedded systems", journal = j-TECS, volume = "4", number = "1", pages = "112--140", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Swaminathan:2005:PBE, author = "Vishnu Swaminathan and Krishnendu Chakrabarty", title = "Pruning-based, energy-optimal, deterministic {I/O} device scheduling for hard real-time systems", journal = j-TECS, volume = "4", number = "1", pages = "141--167", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chiou:2005:SAS, author = "Lih-yih Chiou and Swarup Bhunia and Kaushik Roy", title = "Synthesis of application-specific highly efficient multi-mode cores for embedded systems", journal = j-TECS, volume = "4", number = "1", pages = "168--188", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zambreno:2005:SOA, author = "Joseph Zambreno and Alok Choudhary and Rahul Simha and Bhagi Narahari and Nasir Memon", title = "{SAFE-OPS}: an approach to embedded software security", journal = j-TECS, volume = "4", number = "1", pages = "189--210", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kwon:2005:OVA, author = "Woo-Cheol Kwon and Taewhan Kim", title = "Optimal voltage allocation techniques for dynamically variable voltage processors", journal = j-TECS, volume = "4", number = "1", pages = "211--230", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tan:2005:EME, author = "T. K. Tan and A. Raghunathan and N. K. Jha", title = "Energy macromodeling of embedded operating systems", journal = j-TECS, volume = "4", number = "1", pages = "231--254", month = feb, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:48:07 MST 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2005:GES, author = "Sandeep K. Shukla and Jean-Pierre Talpin", title = "Guest editorial: {Special} issue on models and methodologies for co-design of embedded systems", journal = j-TECS, volume = "4", number = "2", pages = "225--227", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cachera:2005:VSP, author = "David Cachera and Katell Morin-Allory", title = "Verification of safety properties for parameterized regular systems", journal = j-TECS, volume = "4", number = "2", pages = "228--266", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chouali:2005:PPM, author = "S. Chouali and J. Julliand and P.-A. Masson and F. Bellegarde", title = "{PLTL}-partitioned model checking for reactive systems under fairness assumptions", journal = j-TECS, volume = "4", number = "2", pages = "267--301", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gardner:2005:CCS, author = "William B. Gardner", title = "Converging {CSP} specifications and {C++} programming via selective formalism", journal = j-TECS, volume = "4", number = "2", pages = "302--330", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ziller:2005:CSS, author = "Roberto Ziller and Klaus Schneider", title = "Combining supervisor synthesis and model checking", journal = j-TECS, volume = "4", number = "2", pages = "331--362", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2005:HCC, author = "Chuanjun Zhang and Frank Vahid and Walid Najjar", title = "A highly configurable cache for low energy embedded systems", journal = j-TECS, volume = "4", number = "2", pages = "363--387", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kadayif:2005:DSO, author = "I. Kadayif and M. Kandemir", title = "Data space-oriented tiling for enhancing locality", journal = j-TECS, volume = "4", number = "2", pages = "388--414", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Muresan:2005:ICM, author = "Radu Muresan and Catherine Gebotys", title = "Instantaneous current modeling in a complex {VLIW} processor core", journal = j-TECS, volume = "4", number = "2", pages = "415--451", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Petrov:2005:RCF, author = "Peter Petrov and Alex Orailoglu", title = "A reprogrammable customization framework for efficient branch resolution in embedded processors", journal = j-TECS, volume = "4", number = "2", pages = "452--468", month = may, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jun 21 16:50:36 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Burns:2005:E, author = "Alan Burns", title = "Editorial", journal = j-TECS, volume = "4", number = "3", pages = "469--471", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sangiovanni-Vincentelli:2005:OES, author = "Alberto L. Sangiovanni-Vincentelli and Alessandro Pinto", title = "An overview of embedded system design education at {Berkeley}", journal = j-TECS, volume = "4", number = "3", pages = "472--499", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Koopman:2005:UES, author = "Philip Koopman and Howie Choset and Rajeev Gandhi and Bruce Krogh and Diana Marculescu and Priya Narasimhan and Joann M. Paul and Ragunathan Rajkumar and Daniel Siewiorek and Asim Smailagic and Peter Steenkiste and Donald E. Thomas and Chenxi Wang", title = "Undergraduate embedded system education at {Carnegie Mellon}", journal = j-TECS, volume = "4", number = "3", pages = "500--528", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Verbauwhede:2005:SES, author = "Ingrid Verbauwhede and Patrick Schaumont", title = "Skiing the embedded systems mountain", journal = j-TECS, volume = "4", number = "3", pages = "529--548", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sztipanovits:2005:IES, author = "Janos Sztipanovits and Gautam Biswas and Ken Frampton and Aniruddha Gokhale and Larry Howard and Gabor Karsai and T. John Koo and Xenofon Koutsoukos and Douglas C. Schmidt", title = "Introducing embedded software and systems education and advanced learning technology in an engineering curriculum", journal = j-TECS, volume = "4", number = "3", pages = "549--568", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seviora:2005:CES, author = "Rudolph E. Seviora", title = "A curriculum for embedded system engineering", journal = j-TECS, volume = "4", number = "3", pages = "569--586", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Caspi:2005:GGC, author = "P. Caspi and A. Sangiovanni-Vincentelli and L. Almeida and A. Benveniste and B. Bouyssounouse and G. Buttazzo and I. Crnkovic and W. Damm and J. Engblom and G. Folher and M. Garcia-Valls and H. Kopetz and Y. Lakhnech and F. Laroussinie and L. Lavagno and G. Lipari and F. Maraninchi and Ph. Peti and J. de la Puente and N. Scaife and J. Sifakis and R. de Simone and M. Torngren and P. Ver{\'\i}ssimo and A. J. Wellings and R. Wilhelm and T. Willemse and W. Yi", title = "Guidelines for a graduate curriculum on embedded software and systems", journal = j-TECS, volume = "4", number = "3", pages = "587--611", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2005:ESC, author = "Tai-Yi Huang and Chung-Ta King and Youn-Long Steve Lin and Yin-Tsung Hwang", title = "The embedded software consortium of {Taiwan}", journal = j-TECS, volume = "4", number = "3", pages = "612--632", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Grimheden:2005:WES, author = "Martin Grimheden and Martin T{\"o}rngren", title = "What is embedded systems and how should it be taught?---results from a didactic analysis", journal = j-TECS, volume = "4", number = "3", pages = "633--651", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2005:RDC, author = "Wei Zhang and Mahmut Kandemir and Mustafa Karakoy and Guangyu Chen", title = "Reducing data cache leakage energy using a compiler-based approach", journal = j-TECS, volume = "4", number = "3", pages = "652--678", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2005:DDC, author = "Hyung Seok Kim and Tarek F. Abdelzaher and Wook Hyun Kwon", title = "Dynamic delay-constrained minimum-energy dissemination in wireless sensor networks", journal = j-TECS, volume = "4", number = "3", pages = "679--706", month = aug, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Sep 17 15:05:12 MDT 2005", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alur:2005:P, author = "Rajeev Alur and Insup Lee", title = "Preface", journal = j-TECS, volume = "4", number = "4", pages = "707--707", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tardieu:2005:LE, author = "Olivier Tardieu and Robert de Simone", title = "Loops in {ESTEREL}", journal = j-TECS, volume = "4", number = "4", pages = "708--750", month = nov, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1113830.1113832", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Regehr:2005:ESO, author = "John Regehr and Alastair Reid and Kirk Webb", title = "Eliminating stack overflow by abstract interpretation", journal = j-TECS, volume = "4", number = "4", pages = "751--778", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tripakis:2005:TDT, author = "Stavros Tripakis and Christos Sofronis and Paul Caspi and Adrian Curic", title = "Translating discrete-time {Simulink} to {Lustre}", journal = j-TECS, volume = "4", number = "4", pages = "779--818", month = nov, year = "2005", CODEN = "????", DOI = "https://doi.org/10.1145/1113830.1113834", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kadayif:2005:CDH, author = "I. Kadayif and M. Kandemir and G. Chen and N. Vijaykrishnan and M. J. Irwin and A. Sivasubramaniam", title = "Compiler-directed high-level energy estimation and optimization", journal = j-TECS, volume = "4", number = "4", pages = "819--850", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2005:ADR, author = "J. Hu and M. Kandemir and N. Vijaykrishnan and M. J. Irwin", title = "Analyzing data reuse for cache reconfiguration", journal = j-TECS, volume = "4", number = "4", pages = "851--876", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2005:RFL, author = "Tian He and Chengdu Huang and Brian M. Blum and John A. Stankovic and Tarek F. Abdelzaher", title = "Range-free localization and its impact on large scale sensor networks", journal = j-TECS, volume = "4", number = "4", pages = "877--906", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gaujal:2005:SPA, author = "Bruno Gaujal and Nicolas Navet and Cormac Walsh", title = "Shortest-path algorithms for real-time scheduling of {FIFO} tasks with minimal energy use", journal = j-TECS, volume = "4", number = "4", pages = "907--933", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bartolini:2005:OIC, author = "S. Bartolini and C. A. Prete", title = "Optimizing instruction cache performance of embedded systems", journal = j-TECS, volume = "4", number = "4", pages = "934--965", month = nov, year = "2005", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 16 10:59:18 MST 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2006:RDL, author = "W. Zhang and Y.-F. Tsai and D. Duarte and N. Vijaykrishnan and M. Kandemir and M. J. Irwin", title = "Reducing dynamic and leakage energy in {VLIW} architectures", journal = j-TECS, volume = "5", number = "1", pages = "1--28", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Coussy:2006:FMH, author = "Philippe Coussy and Emmanuel Casseau and Pierre Bomel and Adel Baganne and Eric Martin", title = "A formal method for hardware {IP} design and integration under {I/O} and timing constraints", journal = j-TECS, volume = "5", number = "1", pages = "29--53", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Varea:2006:DFN, author = "Mauricio Varea and Bashir M. Al-Hashimi and Luis A. Cort{\'e}S and Petru Eles and Zebo Peng", title = "{Dual Flow Nets}: {Modeling} the control\slash data-flow relation in embedded systems", journal = j-TECS, volume = "5", number = "1", pages = "54--81", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{AbouGhazaleh:2006:COS, author = "Nevine AbouGhazaleh and Daniel Moss{\'e} and Bruce R. Childers and Rami Melhem", title = "Collaborative operating system and compiler power management for real-time applications", journal = j-TECS, volume = "5", number = "1", pages = "82--115", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dean:2006:STI, author = "Alexander G. Dean", title = "Software thread integration for embedded system display applications", journal = j-TECS, volume = "5", number = "1", pages = "116--151", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alur:2006:PAR, author = "Rajeev Alur and Thao Dang and Franjo Ivan{\v{c}}i{\'c}", title = "Predicate abstraction for reachability analysis of hybrid systems", journal = j-TECS, volume = "5", number = "1", pages = "152--199", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seth:2006:FFA, author = "Kiran Seth and Aravindh Anantaraman and Frank Mueller and Eric Rotenberg", title = "{FAST}: {Frequency-Aware Static Timing} analysis", journal = j-TECS, volume = "5", number = "1", pages = "200--224", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2006:RCS, author = "G. Chen and M. Kandemir and M. J. Irwin and J. Ramanujam", title = "Reducing code size through address register assignment", journal = j-TECS, volume = "5", number = "1", pages = "225--258", month = feb, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu May 18 08:17:05 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jerraya:2006:GEC, author = "Ahmed Jerraya and Trevor Mudge", title = "Guest editorial: {Concurrent} hardware and software design for multiprocessor {SoC}", journal = j-TECS, volume = "5", number = "2", pages = "259--262", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2006:DMA, author = "Jiang Xu and Wayne Wolf and Joerg Henkel and Srimat Chakradhar", title = "A design methodology for application-specific networks-on-chip", journal = j-TECS, volume = "5", number = "2", pages = "263--280", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kangas:2006:UBM, author = "Tero Kangas and Petri Kukkala and Heikki Orsila and Erno Salminen and Marko H{\"a}nnik{\"a}inen and Timo D. H{\"a}m{\"a}l{\"a}inen and Jouni Riihim{\"a}ki and Kimmo Kuusilinna", title = "{UML}-based multiprocessor {SoC} design framework", journal = j-TECS, volume = "5", number = "2", pages = "281--320", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hua:2006:EEE, author = "Shaoxiong Hua and Gang Qu and Shuvra S. Bhattacharyya", title = "Energy-efficient embedded software implementation on multiprocessor system-on-chip with multiple voltages", journal = j-TECS, volume = "5", number = "2", pages = "321--341", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hessel:2006:SRA, author = "Fabiano Hessel and Vitor M. {Da Rosa} and Carlos Eduardo Reif and C{\'e}sar Marcon and Tatiana {Gadelha Serra Dos Santos}", title = "Scheduling refinement in abstract {RTOS} models", journal = j-TECS, volume = "5", number = "2", pages = "342--354", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ou:2006:DSE, author = "Jingzhao Ou and Viktor K. Prasanna", title = "Design space exploration using arithmetic-level hardware--software cosimulation for configurable multiprocessor platforms", journal = j-TECS, volume = "5", number = "2", pages = "355--382", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Loghi:2006:CCT, author = "Mirko Loghi and Massimo Poncino and Luca Benini", title = "Cache coherence tradeoffs in shared-memory {MPSoCs}", journal = j-TECS, volume = "5", number = "2", pages = "383--407", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lapalme:2006:NEE, author = "James Lapalme and El Mostapha Aboulhamid and Gabriela Nicolescu", title = "A new efficient {EDA} tool design methodology", journal = j-TECS, volume = "5", number = "2", pages = "408--430", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reshadi:2006:RFI, author = "Mehrdad Reshadi and Nikil Dutt and Prabhat Mishra", title = "A retargetable framework for instruction-set architecture simulation", journal = j-TECS, volume = "5", number = "2", pages = "431--452", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Memik:2006:ENP, author = "Gokhan Memik and William H. Mangione-Smith", title = "Evaluating {Network Processors} using {NetBench}", journal = j-TECS, volume = "5", number = "2", pages = "453--471", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Udayakumaran:2006:DAS, author = "Sumesh Udayakumaran and Angel Dominguez and Rajeev Barua", title = "Dynamic allocation for scratch-pad memory using compile-time decisions", journal = j-TECS, volume = "5", number = "2", pages = "472--511", month = may, year = "2006", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Aug 23 05:26:43 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2006:EEU, author = "Haisang Wu and Binoy Ravindran and E. Douglas Jensen and Peng Li", title = "Energy-efficient, utility accrual scheduling under resource constraints for mobile embedded systems", journal = j-TECS, volume = "5", number = "3", pages = "513--542", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165781", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present an energy-efficient, utility accrual, real-time scheduling algorithm called ReUA. ReUA considers an application model where activities are subject to time/utility function time constraints, mutual exclusion constraints on shared non-CPU resources, and statistical performance requirements on individual activity timeliness behavior. The algorithm targets mobile embedded systems where {\em system-level\/} energy consumption is also a major concern. For such a model, we consider the scheduling objectives of (1) satisfying the statistical performance requirements and (2) maximizing the system-level energy efficiency, while respecting resource constraints. Since the problem is NP-hard, ReUA allocates CPU cycles using statistical properties of application cycle demands, and heuristically computes schedules with a polynomial time cost. We analytically establish several timeliness and nontimeliness properties of the algorithm. Further, our simulation experiments illustrate ReUA's effectiveness and superiority.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Luo:2006:EEI, author = "Liqian Luo and Tarek F. Abdelzaher and Tian He and John A. Stankovic", title = "{EnviroSuite}: an environmentally immersive programming framework for sensor networks", journal = j-TECS, volume = "5", number = "3", pages = "543--576", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165782", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sensor networks open a new frontier for embedded-distributed computing. Paradigms for sensor network programming-in-the-large have been identified as a significant challenge toward developing large-scale applications. Classical programming languages are too low-level. This paper presents the design, implementation, and evaluation of EnviroSuite, a programming framework that introduces a new paradigm, called environmentally immersive programming, to abstract distributed interactions with the environment. Environmentally immersive programming refers to an object-based programming model in which individual objects represent physical elements in the external environment. It allows the programmer to think directly in terms of environmental abstractions. EnviroSuite provides language primitives for environmentally immersive programming that map transparently into a support library of distributed algorithms for tracking and environmental monitoring. We show how nesC code of realistic applications is significantly simplified using EnviroSuite and demonstrate the resulting system performance on Mica2 and XSM platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gebotys:2006:SMC, author = "Catherine H. Gebotys", title = "A split-mask countermeasure for low-energy secure embedded systems", journal = j-TECS, volume = "5", number = "3", pages = "577--612", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165783", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Future wireless embedded devices will be increasingly powerful, supporting many more applications, including one of the most crucial---security. Although many embedded devices offer more resistance to bus---probing attacks because of their compact size, susceptibility to power or electromagnetic analysis attacks must be analyzed. This paper presents a new split-mask countermeasure to thwart low-order differential power analysis (DPA) and differential EM analysis (DEMA). For the first time, real-power and EM measurements are used to analyze the difficulty of launching new third-order DPA and DEMA attacks on a popular low-energy 32-bit embedded ARM processor. Results show that the new split-mask countermeasure provides increased security without large overheads of energy dissipation, compared to previous research. With the emergence of security applications in PDAs, cell phones, and other embedded devices, low-energy countermeasures for resistance to low-order DPA/DEMA is crucial for supporting future enabled wireless internet.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhuang:2006:PLS, author = "Xiaotong Zhuang and Santosh Pande", title = "Parallelizing load\slash stores on dual-bank memory embedded processors", journal = j-TECS, volume = "5", number = "3", pages = "613--657", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165784", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many modern embedded processors such as DSPs support partitioned memory banks (also called X--Y memory or dual-bank memory) along with parallel load/store instructions to achieve higher code density and performance. In order to effectively utilize the parallel load/store instructions, the compiler must partition the memory-resident values and assign them to X or Y bank. This paper gives a postregister allocation solution to merge the generated load/store instructions into their parallel counterparts. Simultaneously, our framework performs allocation of values to X or Y memory banks. We first remove as many load/stores and register--register moves as possible through an excellent iterated coalescing based register allocator by Appel and George [1996]. We then attempt to parallelize the generated load/stores using a multipass approach. The basic phase of our approach attempts the merger of load/stores without duplication and web splitting. We model this problem as a graph-coloring problem in which each value is colored as either X or Y. We then construct a motion scheduling graph (MSG), based on the range of motion for each load/store instruction. MSG reflects potential instructions that could be merged. We propose a notion of pseudofixed boundaries so that the load/store movement is less affected by register dependencies. We prove that the coloring problem for MSG is NP-complete and solve it with two different heuristic algorithms with different complexity. We then propose a two-level iterative process to attempt instruction duplication, variable duplication, web splitting, and local conflict elimination to effectively merge the remaining load/stores. Finally, we clean up some multiple-aliased load/stores. To improve the performance, we combine profiling information with each stage coupled with some modifications to the algorithm. We show that our framework results in parallelization of a large number of load/stores without much growth in data and code segments. The average speedup for our optimization pass reaches roughly 13\% if no profile information is available and 17\% with profile information. The average code and data segment growth is controlled within 13\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jones:2006:RPW, author = "Alex K. Jones and Raymond Hoare and Dara Kusic and Gayatri Mehta and Josh Fazekas and John Foster", title = "Reducing power while increasing performance with {SuperCISC}", journal = j-TECS, volume = "5", number = "3", pages = "658--686", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165785", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multiprocessor Systems on Chips (MPSoCs) have become a popular architectural technique to increase performance. However, MPSoCs may lead to undesirable power consumption characteristics for computing systems that have strict power budgets, such as PDAs, mobile phones, and notebook computers. This paper presents the super-complex instruction-set computing (SuperCISC) Embedded Processor Architecture and, in particular, investigates performance and power consumption of this device compared to traditional processor architecture-based execution. SuperCISC is a heterogeneous, multicore processor architecture designed to exceed performance of traditional embedded processors while maintaining a reduced power budget compared to low-power embedded processors. At the heart of the SuperCISC processor is a multicore VLIW (Very Large Instruction Word) containing several homogeneous execution cores/functional units. In addition, complex and heterogeneous combinational hardware function cores are tightly integrated to the core VLIW engine providing an opportunity for improved performance and reduced energy consumption. Our SuperCISC processor core has been synthesized for both a 90-nm Stratix II Field Programmable Gate Aray (FPGA) and a 160-nm standard cell Application-Specific Integrated Circuit (ASIC) fabrication process from OKI, each operating at approximately 167 MHz for the VLIW core. We examine several reasons for speedup and power improvement through the SuperCISC architecture, including predicated control flow, cycle compression, and a reduction in arithmetic power consumption, which we call power compression. Finally, testing our SuperCISC processor with multimedia and signal-processing benchmarks, we show how the SuperCISC processor can provide performance improvements ranging from 7X to 160X with an average of 60X, while also providing orders of magnitude of power improvements for the computational kernels. The power improvements for our benchmark kernels range from just over 40X to over 400X, with an average savings exceeding 130X. By combining these power and performance improvements, our total energy improvements all exceed 1000X. As these savings are limited to the computational kernels of the applications, which often consume approximately 90\% of the execution time, we expect our savings to approach the ideal application improvement of 10X.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Girault:2006:ARD, author = "Alain Girault and Xavier Nicollin and Marc Pouzet", title = "Automatic rate desynchronization of embedded reactive programs", journal = j-TECS, volume = "5", number = "3", pages = "687--717", month = aug, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1165780.1165786", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Oct 11 06:45:18 MDT 2006", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many embedded reactive programs perform computations at different rates, while still requiring the overall application to satisfy very tight temporal constraints. We propose a method to automatically distribute programs such that the obtained parts can be run at different rates, which we call rate desynchronization. We consider general programs whose control structure is a finite state automaton and with a DAG of actions in each state. The motivation is to take into account long-duration tasks inside the programs: these are tasks whose execution time is long compared to the other computations in the application, and whose maximal execution rate is known and bounded. Merely scheduling such a long duration task at a slow rate would not work since the whole program would be slowed down if compiled into sequential code. It would thus be impossible to meet the temporal constraints, unless such long duration tasks could be desynchronized from the remaining computations. This is precisely what our method achieves: it distributes the initial program into several parts, so that the parts performing the slow computations can be run at an appropriate rate, therefore not impairing the global reaction time of the program. We present in detail our method, all the involved algorithms, and a small running example. We also compare our method with the related work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Biswas:2006:MOP, author = "Surupa Biswas and Thomas Carley and Matthew Simpson and Bhuvan Middha and Rajeev Barua", title = "Memory overflow protection for embedded systems using run-time checks, reuse, and compression", journal = j-TECS, volume = "5", number = "4", pages = "719--752", month = nov, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1196636.1196637", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:45 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems usually lack virtual memory and are vulnerable to memory overflow since they lack a mechanism to detect overflow or use swap space thereafter. We present a method to detect memory overflows using compiler-inserted software run-time checks. Its overheads in run-time and energy are 1.35 and 1.12\%, respectively. Detection of overflow allows system-specific remedial action. We also present techniques to grow the stack or heap segment after they overflow, into previously unutilized space, such as dead variables, free holes in the heap, and space freed by compressing live variables. These may avoid the out-of-memory error if the space recovered is enough to complete execution. The reuse methods are able to grow the stack or heap beyond its overflow by an amount that varies widely by application---the amount of recovered space ranges from 0.7 to 93.5\% of the combined stack and heap size.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "data compression; heap overflow; out-of-memory errors; reliability; reuse; run-time checks; stack overflow", } @Article{Higuera-Toledano:2006:HSD, author = "M. Teresa Higuera-Toledano", title = "Hardware support for detecting illegal references in a multiapplication real-time {Java} environment", journal = j-TECS, volume = "5", number = "4", pages = "753--772", month = nov, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1196636.1196638", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:45 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Our objective is to adapt the Java memory management to an embedded system, e.g., a wireless PDA executing concurrent multimedia applications within a single JVM. This paper provides software, and hardware-based solutions detecting both illegal references across the application memory spaces and dangling pointers within an application space. We give an approach to divide/share the memory among the applications executing concurrently in the system. We introduce and define application-specific memory, building upon the real-time specification for Java (RTSJ) from the real-time Java expert group. The memory model used in RTSJ imposes strict rules for assignment between memory areas, preventing the creation of dangling pointers, and thus maintaining the pointer safety of Java. Our implementation solution to ensure the checking of these rules before each assignment inserts write barriers that use a stack-based algorithm. This solution adversely affects both the performance and predictability of the RTSJ applications, which can be improved by using an existing hardware support.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "garbage collection; memory management; write barriers", } @Article{Winter:2006:TPC, author = "Victor L. Winter and Jason Beranek and Fares Fraij and Steve Roach and Greg Wickstrom", title = "A transformational perspective into the core of an abstract class loader for the {SSP}", journal = j-TECS, volume = "5", number = "4", pages = "773--818", month = nov, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1196636.1196639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:45 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The SSP is a hardware implementation of a subset of the JVM for use in high-consequence embedded applications. In this context, a majority of the activities belonging to class loading, as it is defined in the specification of the JVM, can be performed statically. Static class loading has the net result of dramatically simplifying the design of the SSP, as well as increasing its performance. Because of the high consequence nature of its applications, strong evidence must be provided that all aspects of the SSP have been implemented correctly. This includes the class loader. This article explores the possibility of formally verifying a class loader for the SSP implemented in the strategic programming language TL. Specifically, an implementation of the core activities of an abstract class loader is presented and its verification in ACL2 is considered.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "HATS; higher-order rewriting; SSP; strategic programming; TL", } @Article{Kulkarni:2006:VVI, author = "Prasad Kulkarni and Wankang Zhao and Stephen Hines and David Whalley and Xin Yuan and Robert van Engelen and Kyle Gallivan and Jason Hiser and Jack Davidson and Baosheng Cai and Mark Bailey and Hwashin Moon and Kyunghwan Cho and Yunheung Paek", title = "{VISTA}: {VPO} interactive system for tuning applications", journal = j-TECS, volume = "5", number = "4", pages = "819--863", month = nov, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1196636.1196640", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:45 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software designers face many challenges when developing applications for embedded systems. One major challenge is meeting the conflicting constraints of speed, code size, and power consumption. Embedded application developers often resort to hand-coded assembly language to meet these constraints since traditional optimizing compiler technology is usually of little help in addressing this challenge. The results are software systems that are not portable, less robust, and more costly to develop and maintain. Another limitation is that compilers traditionally apply the optimizations to a program in a fixed order. However, it has long been known that a single ordering of optimization phases will not produce the best code for every application. In fact, the smallest unit of compilation in most compilers is typically a function and the programmer has no control over the code improvement process other than setting flags to enable or disable certain optimization phases. This paper describes a new code improvement paradigm implemented in a system called VISTA that can help achieve the cost/performance trade-offs that embedded applications demand. The VISTA system opens the code improvement process and gives the application programmer, when necessary, the ability to finely control it. VISTA also provides support for finding effective sequences of optimization phases. This support includes the ability to interactively get static and dynamic performance information, which can be used by the developer to steer the code improvement process. This performance information is also internally used by VISTA for automatically selecting the best optimization sequence from several attempted. One such feature is the use of a genetic algorithm to search for the most efficient sequence based on specified fitness criteria. We include a number of experimental results that evaluate the effectiveness of using a genetic algorithm in VISTA to find effective optimization phase sequences.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "genetic algorithms; interactive compilation; phase ordering; user-directed code improvement", } @Article{Ottoni:2006:OAU, author = "Desiree Ottoni and Guilherme Ottoni and Guido Araujo and Rainer Leupers", title = "Offset assignment using simultaneous variable coalescing", journal = j-TECS, volume = "5", number = "4", pages = "864--883", month = nov, year = "2006", CODEN = "????", DOI = "https://doi.org/10.1145/1196636.1196641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:45 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The generation of efficient addressing code is a central problem in compiling for processors with restricted addressing modes, like digital signal processors (DSPs). Offset assignment (OA) is the problem of allocating scalar variables to memory, so as to minimize the need of addressing instructions. This problem is called simple offset assignment (SOA) when a single address register is available, and general offset assignment (GOA) when more address registers are used. This paper shows how variables' liveness information can be used to dramatically reduce the addressing instructions required to access local variables on the program stack. Two techniques that make effective use of variable coalescing to solve SOA and GOA are described, namely coalescing SOA (CSOA) and coalescing GOA (CGOA). In addition, a thorough comparison between these algorithms and others described in the literature is presented. The experimental results, when compiling MediaBench benchmark programs with the LANCE compiler, reveal a very significant improvement of the proposed techniques over the other available solutions to the problem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "address registers; autoincrement addressing modes; DSPs; register allocation; stack offset assignment; variable coalescing", } @Article{Whalley:2007:GE, author = "David Whalley", title = "Guest {Editorial}", journal = j-TECS, volume = "6", number = "1", pages = "1:1--1:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1216577", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2007:ESI, author = "Nagendra J. Kumar and Vasanth Asokan and Siddhartha Shivshankar and Alexander G. Dean", title = "Efficient software implementation of embedded communication protocol controllers using asynchronous software thread integration with time- and space-efficient procedure calls", journal = j-TECS, volume = "6", number = "1", pages = "2:1--2:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210270", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The overhead of context switching limits efficient scheduling of multiple concurrent threads on a uniprocessor when real-time requirements exist. A software-implemented protocol controller may be crippled by this problem. The available idle time may be too short to recover through context switching, so only the primary thread can execute during message activity, slowing the secondary threads and potentially missing deadlines. Asynchronous software thread integration (ASTI) uses coroutine calls and integration, letting threads make independent progress efficiently, and reducing the needed context switches. We demonstrate the methods with a software implementation of an automotive communication protocol (J1850) and several secondary threads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "asynchronous software thread integration; fine-grain concurrency; hardware to software migration; J1850; software-implemented communication protocol controllers", } @Article{Zhuang:2007:PEP, author = "Xiaotong Zhuang and Santosh Pande", title = "Power-efficient prefetching for embedded processors", journal = j-TECS, volume = "6", number = "1", pages = "3:1--3:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210271", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Because of stringent power constraints, aggressive latency-hiding approaches, such as prefetching, are absent in the state-of-the-art embedded processors. There are two main reasons that make prefetching power inefficient. First, compiler-inserted prefetch instructions increase code size and, therefore, could increase I-cache power. Second, inaccurate prefetching (especially for hardware prefetching) leads to high D-cache power consumption because of useless accesses. In this work, we show that it is possible to support power-efficient prefetching through bit-differential offset assignment. We target the prefetching of relocatable stack variables with a high degree of precision. By assigning the offsets of stack variables in such a way that most consecutive addresses differ by 1 bit, we can prefetch them with compact prefetch instructions to save I-cache power. The compiler first generates an access graph of consecutive memory references and then attempts a layout of the memory locations in the smallest hypercube. Each dimension of the hypercube represents a 1-bit differential addressing. The embedding is carried out in as compact a hypercube as possible in order to save memory space. Each load/store instruction carries a hint regarding prefetching the next memory reference by encoding its differential address with respect to the current one. To reduce D-cache power cost, we further attempt to assign offsets so that most of the consecutive accesses map to the same cache line. Our prefetching is done using a one entry line buffer [Wilson et al. 1996]. Consequently, many look-ups in D-cache reduce to incremental ones. This results in D-cache activity reduction and power savings. Our prefetcher requires both compiler and hardware support. In this paper, we provide implementation on the processor model close to ARM with small modification to the ISA. We tackle issues such as out-of-order commit, predication, and speculation through simple modifications to the processor pipeline on noncritical paths. Our goal in this work is to boost performance while maintaining/lowering power consumption. Our results show 12\% speedup and slight power reduction. The runtime virtual space loss for stack and static data is about 11.8\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "bit-differential addressing; data prefetching; embedded processors; offset assignment", } @Article{Contreras:2007:XPP, author = "Gilberto Contreras and Margaret Martonosi and Jinzhang Peng and Guei-Yuan Lueh and Roy Ju", title = "The {XTREM} power and performance simulator for the {Intel XScale} core: {Design} and experiences", journal = j-TECS, volume = "6", number = "1", pages = "4:1--4:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210272", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Managing power concerns in microprocessors has become a pressing research problem across the domains of computer architecture, CAD, and compilers. As a result, several parameterized cycle-level power simulators have been introduced. While these simulators can be quite useful for microarchitectural studies, their generality limits how accurate they can be for any one chip family. Furthermore, their hardware focus means that they do not explicitly enable studying the interaction of different software layers, such as Java applications and their underlying runtime system software. This paper describes and evaluates XTREM, a power-simulation tool tailored for the Intel XScale microarchitecture. In building XTREM, our goals were to develop a microarchitecture simulator that, while still offering size parameterizations for cache and other structures, more accurately reflected a realistic processor pipeline. We present a detailed set of validations based on multimeter power measurements and hardware performance counter sampling. XTREM exhibits an average performance error of only 6.5\% and an even smaller average power error: 4\%. The paper goes on to present an application study enabled by the simulator. Namely, we use XTREM to produce an energy consumption breakdown for Java CDC and CLDC applications. Our simulator measurements indicate that a large percentage of the total energy consumption (up to 35\%) is devoted to the virtual machine's support functions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Intel XScale technology; Java; power measurements; power modeling", } @Article{DeSutter:2007:LTC, author = "Bjorn {De Sutter} and Ludo {Van Put} and Dominique Chanet and Bruno {De Bus} and Koen {De Bosschere}", title = "Link-time compaction and optimization of {ARM} executables", journal = j-TECS, volume = "6", number = "1", pages = "5:1--5:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210273", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The overhead in terms of code size, power consumption, and execution time caused by the use of precompiled libraries and separate compilation is often unacceptable in the embedded world, where real-time constraints, battery life-time, and production costs are of critical importance. In this paper, we present our link-time optimizer for the ARM architecture. We discuss how we can deal with the peculiarities of the ARM architecture related to its visible program counter and how the introduced overhead can to a large extent be eliminated. Our link-time optimizer is evaluated with four tool chains, two proprietary ones from ARM and two open ones based on GNU GCC. When used with proprietary tool chains from ARM Ltd., our link-time optimizer achieved average code size reductions of 16.0 and 18.5\%, while the programs have become 12.8 and 12.3\% faster, and 10.7 to 10.1\% more energy efficient. Finally, we show how the incorporation of link-time optimization in tool chains may influence library interface design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compaction; linker; optimization; performance", } @Article{Panainte:2007:MCR, author = "Elena Moscu Panainte and Koen Bertels and Stamatis Vassiliadis", title = "The {Molen} compiler for reconfigurable processors", journal = j-TECS, volume = "6", number = "1", pages = "6:1--6:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210274", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this paper, we describe the compiler developed to target the Molen reconfigurable processor and programming paradigm. The compiler automatically generates optimized binary code for C applications, based on pragma annotation of the code executed on the reconfigurable hardware. For the IBM PowerPC 405 processor included in the Virtex II Pro platform FPGA, we implemented code generation, register, and stack frame allocation following the PowerPC EABI (embedded application binary interface). The PowerPC backend has been extended to generate the appropriate instructions for the reconfigurable hardware and data transfer, taking into account the information of the specific hardware implementations and system. Starting with an annotated C application, a complete design flow has been integrated to generate the executable bitstream for the reconfigurable processor. The flexible design of the proposed infrastructure allows to consider the special features of the reconfigurable architectures. In order to hide the reconfiguration latencies, we implemented an instruction-scheduling algorithm for the dynamic hardware configuration instructions. The algorithm schedules, in advance, the hardware configuration instructions, taking into account the conflicts for the reconfigurable hardware resources (FPGA area) between the hardware operations. To verify the Molen compiler, we used the multimedia video frame M-JPEG encoder of which the extended discrete cosine transform (DCT*) function was mapped on the FPGA. We obtained an overall speedup of 2.5 (about 84\% efficiency over the maximal theoretical speedup of 2.96). The performance efficiency is achieved using automatically generated nonoptimized DCT* hardware implementation. The instruction-scheduling algorithm has been tested for DCT, quantization, and VLC operations. Based on simulation results, we determine that, while a simple scheduling produces a significant performance decrease, our proposed scheduling contributes for up to $ 16 \times $ M-JPEG encoder speedup.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "FPGA; instruction scheduling; reconfigurable computing", } @Article{Tan:2007:TAP, author = "Yudong Tan and Vincent Mooney", title = "Timing analysis for preemptive multitasking real-time systems with caches", journal = j-TECS, volume = "6", number = "1", pages = "7:1--7:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210275", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this paper, we propose an approach to estimate the worst-case response time (WCRT) of each task in a preemptive multitasking single-processor real-time system utilizing an L1 cache. The approach combines intertask cache-eviction analysis and intratask cache-access analysis to estimate the number of cache lines that can possibly be evicted by the preempting task and also be accessed again by the preempted task after preemptions (thus requiring the preempted task to reload the cache line(s)). This cache-reload delay caused by preempting task(s) is then incorporated into WCRT analysis. Three sets of applications with up to six concurrent tasks running are used to test our approach. The experimental results show that our approach can tighten the WCRT estimate by up to 32\% ($ 1.4 \times $) over prior state-of-the-art.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "real-time; worst-case response time", } @Article{Ratschan:2007:SVH, author = "Stefan Ratschan and Zhikun She", title = "Safety verification of hybrid systems by constraint propagation-based abstraction refinement", journal = j-TECS, volume = "6", number = "1", pages = "8:1--8:??", month = feb, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1210268.1210276", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:20:58 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper deals with the problem of safety verification of nonlinear hybrid systems. We start from a classical method that uses interval arithmetic to check whether trajectories can move over the boundaries in a rectangular grid. We put this method into an abstraction refinement framework and improve it by developing an additional refinement step that employs interval-constraint propagation to add information to the abstraction without introducing new grid elements. Moreover, the resulting method allows switching conditions, initial states, and unsafe states to be described by complex constraints, instead of sets that correspond to grid elements. Nevertheless, the method can be easily implemented, since it is based on a well-defined set of constraints, on which one can run any constraint propagation-based solver. Tests of such an implementation are promising.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "constraint propagation; hybrid systems; intervals", } @Article{Schepers:2007:GEI, author = "Henk Schepers", title = "Guest editorial: {Introduction} to the special issue on software and compilers for embedded systems", journal = j-TECS, volume = "6", number = "2", pages = "9:1--9:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234676", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2007:SCT, author = "Sheayun Lee and Jaejin Lee and Chang Yun Park and Sang Lyul Min", title = "Selective code transformation for dual instruction set processors", journal = j-TECS, volume = "6", number = "2", pages = "10:1--10:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234677", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems are often constrained in terms of both code size and execution time, because of a limited amount of available memory and real-time nature of applications. A dual instruction set processor, which supports a reduced instruction set (16 bits/instruction), in addition to a full instruction set (32 bits/instruction), allows an opportunity for a tradeoff between these two design criteria. Specifically, while the reduced instruction set can be used to reduce code size by providing smaller instructions, a program compiled into the reduced instruction set typically runs slower than the same program compiled into the full instruction set. Motivated by this observation, we propose a code generation technique that exploits this tradeoff relationship by selectively using the two instruction sets for different sections in the program. The proposed technique, called selective code transformation, not only provides a mechanism to enable a flexible tradeoff between a program's code size and its execution time, but also facilitates program optimization toward enhancing its worst case performance. The results from our experiments show that our proposed technique can be effectively used to fine-tune an application program on a spectrum of code size and execution performance, which, in turn, enables a system-wide optimization on memory space and execution speed involving multiple applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "dual instruction set processors; mixed-width instruction set architecture; reduced bid-width instruction set architecture", } @Article{Zhang:2007:RBP, author = "Wei Zhang and Bramha Allu", title = "Reducing branch predictor leakage energy by exploiting loops", journal = j-TECS, volume = "6", number = "2", pages = "11:1--11:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234678", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the scaling of technology, leakage energy will become the dominant source of energy consumption. Besides cache memories, branch predictors are among the largest on-chip array structures and consume nontrivial leakage energy. This paper proposes two cost-effective loop-based strategies to reduce the branch predictor leakage without impacting prediction accuracy or performance. The loop-based approaches exploit the fact that loops usually only contain a small number of instructions and, hence, even fewer branch instructions while taking a significant fraction of the execution time. Consequently, all the nonactive entries of branch predictors can be placed into the low leakage mode during the loop execution in order to reduce leakage energy. Compiler and circuit supports are discussed to implement the proposed leakage-reduction strategies. Compared to the recently proposed decay-based approach, our experimental results show that the loop-based approach can extract 16.2\% more dead time of the branch predictor, on average, leading to more leakage energy savings without impacting the branch prediction accuracy and performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "branch prediction; compiler; leakage energy", } @Article{Scharwaechter:2007:AAE, author = "Hanno Scharwaechter and David Kammler and Andreas Wieferink and Manuel Hohenauer and Kingshuk Karuri and Jianjiang Ceng and Rainer Leupers and Gerd Ascheid and Heinrich Meyr", title = "{ASIP} architecture exploration for efficient {IPSec} encryption: a case study", journal = j-TECS, volume = "6", number = "2", pages = "12:1--12:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234679", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Application-Specific Instruction-Set Processors (ASIPs) are becoming increasingly popular in the world of customized, application-driven System-on-Chip (SoC) designs. Efficient ASIP design requires an iterative architecture exploration loop---gradual refinement of the processor architecture starting from an initial template. To accomplish this task, design automation tools are used to detect bottlenecks in embedded applications, to implement application-specific processor instructions, and to automatically generate the required software tools (such as instruction-set simulator, C-compiler, assembler, and profiler), as well as to synthesize the hardware. This paper describes an architecture exploration loop for an ASIP coprocessor that implements common encryption functionality used in symmetric block cipher algorithms for internet protocol security (IPSec). The coprocessor is accessed via shared memory and, as a consequence, our approach is easily adaptable to arbitrary main processor architectures. This paper presents the extended version of our case study that has been already published on the SCOPES conference in 2004. In both papers, a MIPS architecture is used as the main processor and Blowfish as encryption algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ADL; ASIP; computer-aided design; IPSec", } @Article{Turjan:2007:CIC, author = "Alexandru Turjan and Bart Kienhuis and Ed Deprettere", title = "Classifying interprocess communication in process network representation of nested-loop programs", journal = j-TECS, volume = "6", number = "2", pages = "13:1--13:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234680", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "New embedded signal-processing architectures are emerging that are composed of loosely coupled heterogeneous components like CPUs or DSPs, specialized IP cores, reconfigurable units, or memories. We believe that these architectures should be programmed using the process network model of computation. To ease the mapping of applications, we are developing the Compaan compiler that automatically derives a process network (PN) description from an application written in Matlab or C. In this paper, we investigate a particular problem in nested loop programs, which is about classifying the interprocess communication in the PN representation of the nested loop program. The global memory arrays present in the code have to be replaced by a distributed communication structure used for communicating data between the network processes. We show that four types of communication exist, each exhibiting different requirements when realizing them in hardware or software. We first present two compile time tests that are based on integer linear programming to decide the type of the communication. In the second part of this paper, we present alternative classification techniques that have polynomial complexity. However, in some cases, those techniques do not give a definitive answer and the ILP tests have to be applied. All present tests are combined in a hybrid classification scheme that correctly classifies the interprocess communication. In only 5\% of the cases to classify, we have to rely on integer linear programming while, in the remaining 95\%, the alternative techniques presented in this paper are able to correctly classify each case. The hybrid classification scheme has become an important part of our Compaan compiler.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "hybrid classification approach; integer linear programming; matrix manipulations; static analysis", } @Article{Ko:2007:BSA, author = "Ming-Yung Ko and Praveen K. Murthy and Shuvra S. Bhattacharyya", title = "Beyond single-appearance schedules: {Efficient DSP} software synthesis using nested procedure calls", journal = j-TECS, volume = "6", number = "2", pages = "14:1--14:??", month = may, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1234675.1234681", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:17 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Synthesis of digital signal-processing (DSP) software from dataflow-based formal models is an effective approach for tackling the complexity of modern DSP applications. In this paper, an efficient method is proposed for applying subroutine call instantiation of module functionality when synthesizing embedded software from a dataflow specification. The technique is based on a novel recursive decomposition of subgraphs in a cluster hierarchy that is optimized for low buffer size. Applying this technique, one can achieve significantly lower buffer sizes than what is available for minimum code size inlined schedules, which have been the emphasis of prior work on software synthesis. Furthermore, it is guaranteed that the number of procedure calls in the synthesized program is polynomially bounded in the size of the input dataflow graph, even though the number of module invocations may increase exponentially. This recursive decomposition approach provides an efficient means for integrating subroutine-based module instantiation into the design space of DSP software synthesis. The experimental results demonstrate a significant improvement in buffer cost, especially for more irregular multirate DSP applications, with moderate code and execution time overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "block diagram compiler; design methodology; embedded systems; hierarchical graph decomposition; memory optimization; procedural implementation; synchronous dataflow", } @Article{Hua:2007:PDM, author = "Shaoxiong Hua and Gang Qu and Shuvra S. Bhattacharyya", title = "Probabilistic design of multimedia embedded systems", journal = j-TECS, volume = "6", number = "3", pages = "15:1--15:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275987", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this paper, we propose the novel concept of probabilistic design for multimedia embedded systems, which is motivated by the challenge of how to design, but not overdesign, such systems while systematically incorporating performance requirements of multimedia application, uncertainties in execution time, and tolerance for reasonable execution failures. Unlike most present techniques that are based on either worst- or average-case execution times of application tasks, where the former guarantees the completion of each execution, but often leads to overdesigned systems, and the latter fails to provide any completion guarantees, the proposed probabilistic design method takes advantage of unique features mentioned above of multimedia systems to relax the rigid hardware requirements for software implementation and avoid overdesigning the system. In essence, this relaxation expands the design space and we further develop an off-line on-line minimum effort algorithm for quick exploration of the enlarged design space at early design stages. This is the first step toward our goal of bridging the gap between real-time analysis and embedded software implementation for rapid and economic multimedia system design. It is our belief that the proposed method has great potential in reducing system resource while meeting performance requirements. The experimental results confirm this as we achieve significant saving in system's energy consumption to provide a statistical completion ratio guarantee (i.e., the expected number of completions over a large number of iterations is greater than a given value).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "completion ratio; energy minimization; hardware/software codesign; multiple voltage; probabilistic design; soft real-time system", } @Article{Koushanfar:2007:TMC, author = "Farinaz Koushanfar and Abhijit Davare and David T. Nguyen and Alberto Sangiovanni-Vincentelli and Miodrag Potkonjak", title = "Techniques for maintaining connectivity in wireless ad-hoc networks under energy constraints", journal = j-TECS, volume = "6", number = "3", pages = "16:1--16:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275988", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Distributed wireless systems (DWSs) are emerging as the enabler for next-generation wireless applications. There is a consensus that DWS-based applications, such as pervasive computing, sensor networks, wireless information networks, and speech and data communication networks, will form the backbone of the next technological revolution. Simultaneously, with great economic, industrial, consumer, and scientific potential, DWSs pose numerous technical challenges. Among them, two are widely considered as crucial: autonomous localized operation and minimization of energy consumption. We address the fundamental problem of how to maximize the lifetime of the network using only local information, while preserving network connectivity. We start by introducing the care-free sleep (CS) Theorem that provides provably optimal conditions for a node to go into sleep mode while ensuring that global connectivity is not affected. The CS theorem is the basis for an efficient localized algorithm that decides which nodes will go to into sleep mode and for how long. We have also developed mechanisms for collecting neighborhood information and for the coordination of distributed energy minimization protocols. The effectiveness of the approach is demonstrated using a comprehensive study of the performance of the algorithm over a wide range of network parameters. Another important highlight is the first mathematical and Monte Carlo analysis that establishes the importance of considering nodes within a small number of hops in order to preserve energy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ad-hoc networks; connectivity; energy management; low power; power management; sleeping coordination", } @Article{Wagner:2007:HSI, author = "Fl{\'a}vio R. Wagner and Wander Ces{\'a}rio and Ahmed A. Jerraya", title = "Hardware\slash software {IP} integration using the {ROSES} design environment", journal = j-TECS, volume = "6", number = "3", pages = "17:1--17:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275989", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Considering current time-to-market pressures, IP reuse is mandatory for the design of complex embedded systems-on-chip (SoC). The integration of IP components into a given design is the most complex task in the whole reuse process. This paper describes the IP integration approach implemented in the ROSES design environment, which presents a unique combination of features that enhance IP reuse: automatic assembly of interfaces between heterogeneous software and hardware IP components; easy adaptation to different on-chip communication structures and bus and core standards; generation of customized and minimal OSs for programmable components; and an architecture-independent high-level API embedded into SystemC that makes application software independent from system implementation. Application code is written by using communication functions available in this API. ROSES automatically assembles wrappers that implement these functions, such that the application code does not need to be modified in order to run in the final synthesized system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "IP integration; systems-on-chip", } @Article{Lee:2007:LBB, author = "Sang-Won Lee and Dong-Joo Park and Tae-Sun Chung and Dong-Ho Lee and Sangwon Park and Ha-Joo Song", title = "A log buffer-based flash translation layer using fully-associative sector translation", journal = j-TECS, volume = "6", number = "3", pages = "18:1--18:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275990", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Flash memory is being rapidly deployed as data storage for mobile devices such as PDAs, MP3 players, mobile phones, and digital cameras, mainly because of its low electronic power, nonvolatile storage, high performance, physical stability, and portability. One disadvantage of flash memory is that prewritten data cannot be dynamically overwritten. Before overwriting prewritten data, a time-consuming erase operation on the used blocks must precede, which significantly degrades the overall write performance of flash memory. In order to solve this ``erase-before-write'' problem, the flash memory controller can be integrated with a software module, called ``flash translation layer (FTL).'' Among many FTL schemes available, the log block buffer scheme is considered to be optimum. With this scheme, a small number of log blocks, a kind of write buffer, can improve the performance of write operations by reducing the number of erase operations. However, this scheme can suffer from low space utilization of log blocks. In this paper, we show that there is much room for performance improvement in the log buffer block scheme, and propose an enhanced log block buffer scheme, called FAST (full associative sector translation). Our FAST scheme improves the space utilization of log blocks using fully-associative sector translations for the log block sectors. We also show empirically that our FAST scheme outperforms the pure log block buffer scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "address translation; associative mapping; flash memory; FTL; log blocks", } @Article{Wu:2007:EBT, author = "Chin-Hsien Wu and Tei-Wei Kuo and Li Ping Chang", title = "An efficient {B-tree} layer implementation for flash-memory storage systems", journal = j-TECS, volume = "6", number = "3", pages = "19:1--19:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the significant growth of the markets for consumer electronics and various embedded systems, flash memory is now an economic solution for storage systems design. Because index structures require intensively fine-grained updates/modifications, block-oriented access over flash memory could introduce a significant number of redundant writes. This might not only severely degrade the overall performance, but also damage the reliability of flash memory. In this paper, we propose a very different approach, which can efficiently handle fine-grained updates/modifications caused by B-tree index access over flash memory. The implementation is done directly over the flash translation layer (FTL); hence, no modifications to existing application systems are needed. We demonstrate that when index structures are adopted over flash memory, the proposed methodology can significantly improve the system performance and, at the same time, reduce both the overhead of flash-memory management and the energy dissipation. The average response time of record insertions and deletions was also significantly reduced.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "B-tree; database systems; embedded systems; flash memory; storage systems", } @Article{Xie:2007:ISP, author = "Tao Xie and Xiao Qin", title = "Improving security for periodic tasks in embedded systems through scheduling", journal = j-TECS, volume = "6", number = "3", pages = "20:1--20:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1275986.1275992", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:49:41 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "While many scheduling algorithms for periodic tasks ignore security requirements posed by sensitive applications and are, consequently, unable to perform properly in embedded systems with security constraints, in this paper, we present an approach to scheduling periodic tasks in embedded systems subject to security and timing constraints. We design a necessary and sufficient feasibility check for a set of periodic tasks with security requirements. With the feasibility test in place, we propose a scheduling algorithm, or SASES (security-aware scheduling for embedded systems), which accounts for both security and timing requirements. SASES judiciously distributes slack times among a variety of security services for a set of periodic tasks, thereby optimizing security for embedded systems without sacrificing schedulability. To demonstrate the effectiveness of SASES, we apply the proposed SASES to real-world embedded systems such as an automated flight control system. We show, through extensive simulations, that SASES is able to maximize security for embedded systems while guaranteeing timeliness. In particular, SASES significantly improves security over three baseline algorithms by up to 107\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded systems; periodic tasks; real-time systems; scheduling; security-sensitive applications", } @Article{Gupta:2007:ISL, author = "Rajiv Gupta and Yunheung Paek", title = "Introduction to the special {LCTES'05} issue", journal = j-TECS, volume = "6", number = "4", pages = "21:1--21:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274859", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gay:2007:SDP, author = "David Gay and Philip Levis and David Culler", title = "Software design patterns for {TinyOS}", journal = j-TECS, volume = "6", number = "4", pages = "22:1--22:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274860", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present design patterns used by software components in the TinyOS sensor network operating system. They differ significantly from traditional software design patterns because of the constraints of sensor networks and to TinyOS's focus on static allocation and whole-program composition. We describe how nesC has evolved to support these design patterns by including a few simple language primitives and optimizations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "design patterns; embedded systems; nesC; TinyOS", } @Article{Chanet:2007:ARM, author = "Dominique Chanet and Bjorn {De Sutter} and Bruno {De Bus} and Ludo {Van Put} and Koen {De Bosschere}", title = "Automated reduction of the memory footprint of the {Linux} kernel", journal = j-TECS, volume = "6", number = "4", pages = "23:1--23:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274861", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The limited built-in configurability of Linux can lead to expensive code size overhead when it is used in the embedded market. To overcome this problem, we propose the application of link-time compaction and specialization techniques that exploit the a priori known, fixed runtime environment of many embedded systems. In experimental setups based on the ARM XScale and i386 platforms, the proposed techniques are able to reduce the kernel memory footprint with over 16\%. We also show how relatively simple additions to existing binary rewriters can implement the proposed techniques for a complex, very unconventional program, such as the Linux kernel. We note that even after specialization, a lot of seemingly unnecessary code remains in the kernel and propose to reduce the footprint of this code by applying code-compression techniques. This technique, combined with the previous ones, reduces the memory footprint with over 23\% for the i386 platform and 28\% for the ARM platform. Finally, we pinpoint an important code size growth problem when compaction and compression techniques are combined on the ARM platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compaction; compression; Linux kernel; operating system; specialization; system calls", } @Article{Sassone:2007:SSS, author = "Peter G. Sassone and D. Scott Wills and Gabriel H. Loh", title = "Static strands: {Safely} exposing dependence chains for increasing embedded power efficiency", journal = j-TECS, volume = "6", number = "4", pages = "24:1--24:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274862", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern embedded processors are designed to maximize execution efficiency---the amount of performance achieved per unit of energy dissipated while meeting minimum performance levels. To increase this efficiency, we propose utilizing static strands, dependence chains without fan-out, which are exposed by a compiler pass. These dependent instructions are resequenced to be sequential and annotated to communicate their location to the hardware. Importantly, this modified application is binary compatible and functionally identical to the original, allowing transparent execution on a baseline processor. However, these static strands can be easily collapsed and optimized by simple processor modifications, significantly reducing the workload energy. Results show that over 30\% of MediaBench and Spec2000int dynamic instructions can be collapsed, reducing issue logic energy by 20\%, bypass energy 19\%, and register file energy 14\%. In addition, by increasing the effective capacity of pipeline resources by almost a third, average IPC can be improved up to 15\%. This performance gain can then be traded in for a lower clock frequency to maintain a basline level of performance, further reducing energy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "architecture; dependency collapsing; energy; sequentiality", } @Article{Staschulat:2007:SPC, author = "Jan Staschulat and Rolf Ernst", title = "Scalable precision cache analysis for real-time software", journal = j-TECS, volume = "6", number = "4", pages = "25:1--25:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274863", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Caches are needed to increase the processor performance, but the temporal behavior is difficult to predict, especially in embedded systems with preemptive scheduling. Current approaches use simplified assumptions or propose complex analysis algorithms to bound the cache-related preemption delay. In this paper, a scalable preemption delay analysis for associative instruction caches to control the analysis precision and the time-complexity is proposed. An accurate preemption delay calculation is integrated into a cache-aware schedulability analysis. The framework is evaluated in several experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "cache; embedded systems; preemptive scheduling; worst-case execution time analysis", } @Article{Varma:2007:AFS, author = "Ankush Varma and Bruce Jacob and Eric Debes and Igor Kozintsev and Paul Klein", title = "Accurate and fast system-level power modeling: an {XScale}-based case study", journal = j-TECS, volume = "6", number = "4", pages = "26:1--26:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274864", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Accurate and fast system modeling is central to the rapid design space exploration needed for embedded-system design. With fast, complex SoCs playing a central role in such systems, system designers have come to require MIPS-range simulation speeds and near-cycle accuracy. The sophisticated simulation frameworks that have been developed for high-speed system performance modeling do not address power consumption, although it is a key design constraint. In this paper, we define a simulation-based methodology for extending system performance-modeling frameworks to also include power modeling. We demonstrate the use of this methodology with a case study of a real, complex embedded system, comprising the Intel XScale{\reg}g embedded microprocessor, its WMMX{\trademark} SIMD coprocessor, L1 caches, SDRAM and the on-board address and data buses. We describe detailed power models for each of these components and validate them against physical measurements from hardware, demonstrating that such frameworks enable designers to model both power and performance at high speeds without sacrificing accuracy. Our results indicate that the power estimates obtained are accurate within 5\% of physical measurements from hardware, while simulation speeds consistently exceed a million instructions per second (MIPS).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded systems; power modeling; SystemC", } @Article{Carta:2007:CTA, author = "Salvatore Carta and Andrea Alimonda and Alessandro Pisano and Andrea Acquaviva and Luca Benini", title = "A control theoretic approach to energy-efficient pipelined computation in {MPSoCs}", journal = j-TECS, volume = "6", number = "4", pages = "27:1--27:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274865", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, we describe a control theoretic approach to dynamic voltage/frequency scaling (DVFS) in a pipelined MPSoC architecture with soft real-time constraints, aimed at minimizing energy consumption with throughput guarantees. Theoretical analysis and experiments carried out on a cycle-accurate, energy-aware, and multiprocessor simulation platform are provided. We give a dynamic model of the system behavior which allows to synthesize linear and nonlinear feedback control schemes for the run-time adjustment of the core frequencies. We study the characteristics of the proposed techniques in both transient and steady-state conditions. Finally, we compare the proposed feedback approaches and local DVFS policies from an energy consumption viewpoint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "DVFS; feedback-control techniques; MPSoC; parallel systems", } @Article{Crenshaw:2007:RIE, author = "Tanya L. Crenshaw and Spencer Hoke and Ajay Tirumala and Marco Caccamo", title = "Robust implicit {EDF}: a wireless {MAC} protocol for collaborative real-time systems", journal = j-TECS, volume = "6", number = "4", pages = "28:1--28:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274866", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in wireless technology have brought us closer to extensive deployment of distributed real-time embedded systems connected through a wireless channel. The medium-access control (MAC) layer protocol is critical in providing a real-time guarantee. We have devised a real-time wireless MAC protocol, robust implicit earliest deadline first, or RI-EDF. Packets are transmitted according to EDF scheduling rules, offering a protocol that implicitly avoids contention. In the event of a packet loss or a node failure, every node has the opportunity to recover the schedule based on a static recovery priority, offering a protocol that is robust with no central point of failure. We demonstrate in simulations that RI-EDF provides better goodput and lower packet loss than existing protocols like 802.11 PCF and EDCF. In our implementation and distributed control test-bed, we show that RI-EDF provides better throughput than the TinyOS MAC-layer protocol. Overall, RI-EDF provides predictable temporal behavior with minimal impact on node failures, packet losses, and noise in the channel.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "earliest deadline first; medium-access control; real time; wireless", } @Article{Quan:2007:EED, author = "Gang Quan and Xiaobo Sharon Hu", title = "Energy efficient {DVS} schedule for fixed-priority real-time systems", journal = j-TECS, volume = "6", number = "4", pages = "29:1--29:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274867", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy consumption has become an increasingly important consideration in designing many real-time embedded systems. Variable voltage processors, if used properly, can dramatically reduce such system energy consumption. In this paper, we present a technique to determine voltage settings for a variable voltage processor that utilizes a fixed-priority assignment to schedule jobs. By exploiting more efficiently the processor slack time, our approach can be more effective in reducing the execution speed for real-time tasks when necessary. Our approach also produces the minimum constant voltage needed to feasibly schedule the entire job set. With both randomly generated and practical examples, our heuristic approach can achieve the dynamic energy reduction very close to the theoretically optimal one (within 2\%) with much less computation cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "dynamic voltage scaling; fixed-priority scheduling; low power; real time", } @Article{Rao:2007:EOS, author = "Ravishankar Rao and Sarma Vrudhula", title = "Energy optimal speed control of a producer--consumer device pair", journal = j-TECS, volume = "6", number = "4", pages = "30:1--30:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274868", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose a modular approach for minimizing the total energy consumed by a pair of generic communicating devices (producer--consumer scenario) by jointly controlling their speed profiles. Each device (like a CPU, or disk drive) is assumed to have a controllable variable called its speed (e.g., a CPU's clock frequency, a disk drive's spindle motor speed) that affects its power consumption and performance (e.g., throughput, data transfer rate). The device and task models we analyzed were inspired by applications like CD recording (hard drive to CD drive data transfer) and data processing (disk drive to CPU data transfer). The proposed solution can be used for any pair of devices with convex (for continuous speed sets) or W-convex (a discrete version of a convex function for discrete speed sets) power--speed relationships. For discrete speed sets, the method operates directly on the power--speed values and does not require an analytical relationship between power and speed. The key to solving the two-device optimization problem was the observation that it could be split into two single device parametric optimization problems, where the parameters correspond to the common task that both the devices must execute. The following divide-and-conquer approach is proposed: [divide] the optimal speed policy and energy consumption of each device is derived as an analytical function of its task parameters; [conquer] the optimal values of these parameters are found by minimizing the sum of the parameterized energy functions and plugged back into the parameterized speed profiles. The main advantage of this approach is that each device can be characterized independently and this allows system designers to mix and match manufacturer-supplied device energy curves to evaluate and optimize different application scenarios. We demonstrate our approach using three device characterization examples (for a CD drive, hard drive, and a CPU) and two application scenarios (CD recording, MD5 checksum computation).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "disk drive; energy optimization; joint optimization; processor; speed control", } @Article{Loghi:2007:PMM, author = "Mirko Loghi and Luca Benini and Massimo Poncino", title = "Power macromodeling of {MPSoC} message passing primitives", journal = j-TECS, volume = "6", number = "4", pages = "31:1--31:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274869", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Estimating the energy consumption of software in multiprocessor systems-on-chip (MPSoCs) is crucial for enabling quick evaluations of both software and hardware optimizations. However, high-level estimations should be applicable at software level, possibly constructing effective power models depending on parameters that can be extracted directly from the application characteristics. We propose a methodology for accurate analysis of power consumption of message-passing primitives in a MPSoC, and, in particular, an energy model which, in spite of its simplicity, allows to model the traffic-dependent nature of energy consumption through the use of a single, abstract parameter, namely, the size of the message exchanged.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "communication primitives; macromodeling; multiprocessor; system-on-chip", } @Article{Kansal:2007:PME, author = "Aman Kansal and Jason Hsu and Sadaf Zahedi and Mani B. Srivastava", title = "Power management in energy harvesting sensor networks", journal = j-TECS, volume = "6", number = "4", pages = "32:1--32:??", month = sep, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1274858.1274870", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:30 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Power management is an important concern in sensor networks, because a tethered energy infrastructure is usually not available and an obvious concern is to use the available battery energy efficiently. However, in some of the sensor networking applications, an additional facility is available to ameliorate the energy problem: harvesting energy from the environment. Certain considerations in using an energy harvesting source are fundamentally different from that in using a battery, because, rather than a limit on the maximum energy, it has a limit on the maximum rate at which the energy can be used. Further, the harvested energy availability typically varies with time in a nondeterministic manner. While a deterministic metric, such as residual battery, suffices to characterize the energy availability in the case of batteries, a more sophisticated characterization may be required for a harvesting source. Another issue that becomes important in networked systems with multiple harvesting nodes is that different nodes may have different harvesting opportunity. In a distributed application, the same end-user performance may be achieved using different workload allocations, and resultant energy consumptions at multiple nodes. In this case, it is important to align the workload allocation with the energy availability at the harvesting nodes. We consider the above issues in power management for energy-harvesting sensor networks. We develop abstractions to characterize the complex time varying nature of such sources with analytically tractable models and use them to address key design issues. We also develop distributed methods to efficiently use harvested energy and test these both in simulation and experimentally on an energy-harvesting sensor network, prototyped for this work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "adaptive duty cycling; energy neutrality; Heliomote; lifetime; power management", } @Article{Bueno:2007:RRP, author = "David Bueno and Chris Conger and Alan D. George and Ian Troxel and Adam Leko", title = "{RapidIO} for radar processing in advanced space systems", journal = j-TECS, volume = "7", number = "1", pages = "1:1--1:38", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324970", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Space-based radar is a suite of applications that presents many unique system design challenges. In this paper, we investigate use of RapidIO, a new high-performance embedded systems interconnect, in addressing issues associated with the high network bandwidth requirements of real-time ground moving target indicator (GMTI), and synthetic aperture Radar (SAR) applications in satellite systems. Using validated simulation, we study several critical issues related to the RapidIO network and algorithms under study. The results show that RapidIO is a promising platform for space-based radar using emerging technology, providing network bandwidth to enable parallel computation previously unattainable in an embedded satellite system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ground-moving target indicator; RapidIO; space-based radar; synthetic aperture radar", } @Article{Fei:2007:EOS, author = "Yunsi Fei and Srivaths Ravi and Anand Raghunathan and Niraj K. Jha", title = "Energy-optimizing source code transformations for operating system-driven embedded software", journal = j-TECS, volume = "7", number = "1", pages = "2:1--2:26", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324971", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper proposes four types of source code transformations for operating system (OS)-driven embedded software programs to reduce their energy consumption. Their key features include spanning of process boundaries and minimization of the energy consumed in the execution of OS services---opportunities which are beyond the reach of conventional compiler optimizations and source code transformations. We have applied the proposed transformations to several multiprocess benchmark programs in the context of an embedded Linux OS running on an Intel StrongARM processor. They achieve up to 37.9\% (23.8\%, on average) energy reduction compared to highly compiler-optimized implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "energy consumption; Linux; source code transformations", } @Article{Zhu:2007:ESA, author = "Yifan Zhu and Frank Mueller", title = "Exploiting synchronous and asynchronous {DVS} for feedback {EDF} scheduling on an embedded platform", journal = j-TECS, volume = "7", number = "1", pages = "3:1--3:26", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324972", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Contemporary processors support dynamic voltage scaling (DVS) to reduce power consumption by varying processor voltage/frequency dynamically. We develop power-aware feedback--DVS algorithms for hard real-time systems that adapt to dynamically changing workloads. The algorithms lower execution speed while guaranteeing timing constraints. We study energy consumption for synchronous and asynchronous DVS switching on a PowerPC board. Energy, measured via data acquisition, is reduced up to 70\% over na{\"\i}ve DVS for our feedback scheme with 24\% peak savings over previous algorithms. These results, albeit differing in quantity, confirm trends observed under simulation. They are the first of their kind on an embedded board.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "dynamic voltage scaling; feedback control; real-time systems; scheduling", } @Article{Vera:2007:DCL, author = "Xavier Vera and Bj{\"o}rn Lisper and Jingling Xue", title = "Data cache locking for tight timing calculations", journal = j-TECS, volume = "7", number = "1", pages = "4:1--4:38", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324973", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Caches have become increasingly important with the widening gap between main memory and processor speeds. Small and fast cache memories are designed to bridge this discrepancy. However, they are only effective when programs exhibit sufficient data locality. In addition, caches are a source of unpredictability, resulting in programs sometimes behaving in a different way than expected. Detailed information about the number of cache misses and their causes allows us to predict cache behavior and to detect bottlenecks. Small modifications in the source code may change memory patterns, thereby altering the cache behavior. Code transformations, which take the cache behavior into account, might result in a high cache performance improvement. However, cache memory behavior is very hard to predict, thus making the task of optimizing and timing cache behavior very difficult. This article proposes and evaluates a new compiler framework that times cache behavior for multitasking systems. Our method explores the use of cache partitioning and dynamic cache locking to provide worst-case performance estimates in a safe and tight way for multitasking systems. We use cache partitioning, which divides the cache among tasks to eliminate intertask cache interferences. We combine static cache analysis and cache-locking mechanisms to ensure that all intratask conflicts, and consequently, memory access times, are exactly predictable. The results of our experiments demonstrate the capability of our framework to describe cache behavior at compile time. We compare our timing approach with a system equipped with a nonpartitioned, but statically, locked data cache. Our method outperforms static cache locking for all analyzed task sets under various cache architectures, demonstrating that our fully predictable scheme does not compromise the performance of the transformed programs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "data cache analysis; embedded systems; safety critical systems; worst-case execution time", } @Article{Armbruster:2007:RTJ, author = "Austin Armbruster and Jason Baker and Antonio Cunei and Chapman Flack and David Holmes and Filip Pizlo and Edward Pla and Marek Prochazka and Jan Vitek", title = "A real-time {Java} virtual machine with applications in avionics", journal = j-TECS, volume = "7", number = "1", pages = "5:1--5:49", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324974", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper reports on our experience with the implementation of the Real-time Specification for Java on the Ovm open source Java virtual machine. We describe the architecture and main design decisions involved in implementing real-time Java on Ovm. We present the first use of Real-time Java in avionics in the context of control software for a ScanEagle Unmanned Aerial Vehicle.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "avionics; memory management; Real-Time Java; virtual machines", } @Article{Mangeruca:2007:USU, author = "Leonardo Mangeruca and Massimo Baleani and Alberto Ferrari and Alberto Sangiovanni-Vincentelli", title = "Uniprocessor scheduling under precedence constraints for embedded systems design", journal = j-TECS, volume = "7", number = "1", pages = "6:1--6:30", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324975", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this paper, we present a novel approach to the constrained scheduling problem, while addressing a more general class of constraints that arise from the timing requirements on real-time embedded controllers. We provide general necessary and sufficient conditions for scheduling under precedence constraints and derive sufficient conditions for two well-known scheduling policies. We define mathematical problems that provide optimum priority and deadline assignments, while ensuring both precedence constraints and system's schedulability. We show how these problems can be relaxed to corresponding integer linear programming (ILP) formulations leveraging on available solvers. The results are demonstrated on a real design case.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "design of embedded systems; embedded software; precedence constraints; real-time scheduling", } @Article{Bordoloi:2007:ISA, author = "Unmesh D. Bordoloi and Samarjit Chakraborty", title = "Interactive schedulability analysis", journal = j-TECS, volume = "7", number = "1", pages = "7:1--7:27", month = dec, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1324969.1324976", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:21:48 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A typical design process for real-time embedded systems involves choosing the values of certain system parameters and performing a schedulability analysis to determine whether all deadline constraints can be satisfied. If such an analysis returns a negative answer, then some of the parameters are modified and the analysis is invoked once again. This iteration is repeated until a schedulable design is obtained. However, the schedulability analysis problem for most task models is intractable (usually co-NP hard) and, hence, such an iterative design process is often very expensive. To get around this problem, we introduce the concept of ``interactive'' schedulability analysis. It is based on the observation that if only a small number of system parameters are changed, then it is not necessary to rerun the full schedulability analysis algorithm, thereby making the iterative design process considerably faster. We refer to this analysis as being ``interactive'' because it is supposed to be run in an interactive mode. This concept is fairly general and can be applied to a wide variety of task models. In this paper, we have chosen the recurring real-time task model, because it can be used to represent realistic applications from the embedded systems domain (containing conditional branches and fine-grained deadline constraints). Our experimental results show that using our scheme can lead to more than 20{\times} speedup for each invocation of the schedulability analysis algorithm, compared to the case where the full algorithm is run.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "interactive design; nonfunctional constraints; performance debugging; recurring real-time task model; schedulability analysis", } @Article{Ha:2008:IES, author = "Soonhoi Ha and Kiyoung Choi and Taewhan Kim and Krisztian Flautner and Sanglyul Min and Wang Yi", title = "Introduction to embedded systems week 2006 special issue", journal = j-TECS, volume = "7", number = "2", pages = "8:1--8:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331332", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2008:EAC, author = "Minyoung Kim and Sudarshan Banerjee and Nikil Dutt and Nalini Venkatasubramanian", title = "Energy-aware cosynthesis of real-time multimedia applications on {MPSoCs} using heterogeneous scheduling policies", journal = j-TECS, volume = "7", number = "2", pages = "9:1--9:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331333", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time multimedia applications are increasingly being mapped onto MPSoC (multiprocessor system-on-chip) platforms containing hardware--software IPs (intellectual property), along with a library of common scheduling policies such as EDF, RM. The choice of a scheduling policy for each IP is a key decision that greatly affects the design's ability to meet real-time constraints, and also directly affects the energy consumed by the design. We present a cosynthesis framework for design space exploration that considers heterogeneous scheduling while mapping multimedia applications onto such MPSoCs. In our approach, we select a suitable scheduling policy for each IP such that system energy is minimized---our framework also includes energy-reduction techniques utilizing dynamic power management. Experimental results on a realistic multimode multimedia terminal application demonstrate that our approach enables us to select design points with up to 60.5\% reduced energy for a given area constraint, while meeting all real-time requirements. More importantly, our approach generates a tradeoff space between energy and cost allowing designers to comparatively evaluate multiple system level mappings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "cosynthesis; energy; MPSoC; real-time scheduling", } @Article{Raman:2008:ASW, author = "Balaji Raman and Samarjit Chakraborty", title = "Application-specific workload shaping in multimedia-enabled personal mobile devices", journal = j-TECS, volume = "7", number = "2", pages = "10:1--10:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331334", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today, most personal mobile devices (e.g., cell phones and PDAs) are multimedia-enabled and support a variety of concurrently running applications, such as audio/video players, word processors, and web browsers. Media-processing applications are often computationally expensive and most of these devices typically have 100--400-MHz processors. As a result, the user-perceived application response times are often poor when multiple applications are concurrently fired. In this paper, we show that by using application-specific dynamic buffering techniques, the workload of these applications can be suitably ``shaped'' to fit the available processor bandwidth. Our techniques are analogous to traffic shaping, which is widely used in communication networks to optimally utilize network bandwidth. Such shaping techniques have recently attracted a lot of attention in the context of embedded systems design (e.g., for dynamic voltage scaling). However, they have not been exploited for enhanced schedulability of multiple applications, as we do in this paper.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "mobile devices; multimedia systems; schedulability analysis", } @Article{Egger:2008:DSM, author = "Bernhard Egger and Jaejin Lee and Heonshik Shin", title = "Dynamic scratchpad memory management for code in portable systems with an {MMU}", journal = j-TECS, volume = "7", number = "2", pages = "11:1--11:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331335", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, we present a dynamic memory allocation technique for a novel, horizontally partitioned memory subsystem targeting contemporary embedded processors with a memory management unit (MMU). We propose to replace the on-chip instruction cache with a scratchpad memory (SPM) and a small minicache. Serializing the address translation with the actual memory access enables the memory system to access either only the SPM or the minicache. Independent of the SPM size and based solely on profiling information, a postpass optimizer classifies the code of an application binary into a pageable and a cacheable code region. The latter is placed at a fixed location in the external memory and cached by the minicache. The former, the pageable code region, is copied on demand to the SPM before execution. Both the pageable code region and the SPM are logically divided into pages the size of an MMU memory page. Using the MMU's pagefault exception mechanism, a runtime scratchpad memory manager (SPMM) tracks page accesses and copies frequently executed code pages to the SPM before they get executed. In order to minimize the number of page transfers from the external memory to the SPM, good code placement techniques become more important with increasing sizes of the MMU pages. We discuss code-grouping techniques and provide an analysis of the effect of the MMU's page size on execution time, energy consumption, and external memory accesses. We show that by using the data cache as a victim buffer for the SPM, significant energy savings are possible. We evaluate our SPM allocation strategy with fifteen applications, including H.264, MP3, MPEG-4, and PGP. The proposed memory system requires 8\% less die are compared to a fully-cached configuration. On average, we achieve a 31\% improvement in runtime performance and a 35\% reduction in energy consumption with an MMU page size of 256 bytes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "code placement; compilers; heterogeneous memory; paging; portable systems; postpass optimization; scratchpad; victim cache; virtual memory", } @Article{Scholz:2008:MPB, author = "Bernhard Scholz and Bernd Burgstaller and Jingling Xue", title = "Minimal placement of bank selection instructions for partitioned memory architectures", journal = j-TECS, volume = "7", number = "2", pages = "12:1--12:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331336", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We have devised an algorithm for minimal placement of bank selections in partitioned memory architectures. This algorithm is parameterizable for a chosen metric, such as speed, space, or energy. Bank switching is a technique that increases the code and data memory in microcontrollers without extending the address buses. Given a program in which variables have been assigned to data banks, we present a novel optimization technique that minimizes the overhead of bank switching through cost-effective placement of bank selection instructions. The placement is controlled by a number of different objectives, such as runtime, low power, small code size or a combination of these parameters. We have formulated the minimal placement of bank selection instructions as a discrete optimization problem that is mapped to a partitioned boolean quadratic programming (PBQP) problem. We implemented the optimization as part of a PIC Microchip backend and evaluated the approach for several optimization objectives. Our benchmark suite comprises programs from MiBench and DSPStone plus a microcontroller real-time kernel and drivers for microcontroller hardware devices. Our optimization achieved a reduction in program memory space of between 2.7 and 18.2\%, and an overall improvement with respect to instruction cycles between 5.0 and 28.8\%. Our optimization achieved the minimal solution for all benchmark programs. We investigated the scalability of our approach toward the requirements of future generations of microcontrollers. This study was conducted as a worst-case analysis on the entire MiBench suite. Our results show that our optimization (1) scales well to larger numbers of memory banks, (2) scales well to the larger problem sizes that will become feasible with future microcontrollers, and (3) achieves minimal placement for more than 72\% of all functions from MiBench.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "bank selection; partitioned Boolean quadratic programming; partitioned memory architectures", } @Article{Choi:2008:SHM, author = "Yoonseo Choi and Hwansoo Han", title = "Shared heap management for memory-limited {Java} virtual machines", journal = j-TECS, volume = "7", number = "2", pages = "13:1--13:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One scarce resource in embedded systems is memory. Multitasking makes the lack of memory problem even worse. Most current embedded systems, which do not provide virtual memory, simply divide physical memory and evenly assign contiguous memory chunks to multiple applications. Such simple memory management can frequently cause the lack of available memory for some applications, while others are not using the full amount of assigned memory. To overcome inefficiency in current memory management, we present an efficient heap management scheme that allows multiple applications to share heap space. To reduce overall heap memory usage, applications adaptively acquire subheaps out of shared pool of memory and release surplus subheaps to shared pool. As a result, applications see noncontiguous multiple subheaps as a heap in their address space. We target Java applications to implement our heap-sharing scheme in the KVM from Sun Microsystems. To protect fragmented heap space with a limited number of regions in memory protection unit (MPU), we maintain only a limited number of subheaps. We experimentally evaluate our heap management scheme with J2ME MIDP applications. Our static and dynamic schemes reduce heap memory usage, on average, by 30 and 27\%, respectively. For both schemes, overheads are kept low. The execution times in our schemes are increased only by 0.01\% for static scheme and 0.35\% for dynamic scheme, on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "dynamic memory management; garbage collection; heap sharing; memory protection unit", } @Article{So:2008:UHS, author = "Hayden Kwok-Hay So and Robert Brodersen", title = "A unified hardware\slash software runtime environment for {FPGA}-based reconfigurable computers using {BORPH}", journal = j-TECS, volume = "7", number = "2", pages = "14:1--14:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper explores the design and implementation of BORPH, an operating system designed for FPGA-based reconfigurable computers. Hardware designs execute as normal UNIX processes under BORPH, having access to standard OS services, such as file system support. Hardware and software components of user designs may, therefore, run as communicating processes within BORPH's runtime environment. The familiar language independent UNIX kernel interface facilitates easy design reuse and rapid application development. To develop hardware designs, a Simulink-based design flow that integrates with BORPH is employed. Performances of BORPH on two on-chip systems implemented on a BEE2 platform are compared.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "BORPH; FPGA; hardware process; reconfigurable computers", } @Article{Caspi:2008:SPM, author = "Paul Caspi and Norman Scaife and Christos Sofronis and Stavros Tripakis", title = "Semantics-preserving multitask implementation of synchronous programs", journal = j-TECS, volume = "7", number = "2", pages = "15:1--15:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We study the implementation of a synchronous program as a set of multiple tasks running on the same computer, and scheduled by a real-time operating system using some preemptive scheduling policy, such as fixed priority or earliest-deadline first. Multitask implementations are necessary, for instance, in multiperiodic applications, when the worst-case execution time of the program is larger than its smallest period. In this case, a single-task implementation violates the schedulability assumption and, therefore, the synchrony hypothesis does not hold. We are aiming at semantics-preserving implementations, where, for a given input sequence, the output sequence produced by the implementation is the same as that produced by the original synchronous program, and this under all possible executions of the implementation. Straightforward implementation techniques are not semantics-preserving. We present an intertask communication protocol, called DBP, that is semantics-preserving and memory-optimal. DBP guarantees semantical preservation under all possible triggering patterns of the synchronous program: thus, it is applicable not only to time-, but also event-triggered applications. DBP works under both fixed priority and earliest-deadline first scheduling. DBP is a nonblocking protocol based on the use of intermediate buffers and manipulations of write-to/read-from pointers to these buffers: these manipulations happen upon arrivals, rather than executions of tasks, which is a distinguishing feature of DBP. DBP is memory-optimal in the sense that it uses as few buffers as needed, for any given triggering pattern. In the worst case, DBP requires, at most, $ N + 2 $ buffers for each writer, where $N$ is the number of readers for this writer.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded software; model-based design; optimality; preemptive scheduling; process communication; semantical preservation; synchronous programming", } @Article{Liu:2008:HPP, author = "Duo Liu and Zheng Chen and Bei Hua and Nenghai Yu and Xinan Tang", title = "High-performance packet classification algorithm for multithreaded {IXP} network processor", journal = j-TECS, volume = "7", number = "2", pages = "16:1--16:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Packet classification is crucial for the Internet to provide more value-added services and guaranteed quality of service. Besides hardware-based solutions, many software-based classification algorithms have been proposed. However, classifying at 10 Gbps speed or higher is a challenging problem and it is still one of the performance bottlenecks in core routers. In general, classification algorithms face the same challenge of balancing between high classification speed and low memory requirements. This paper proposes a modified recursive flow classification (RFC) algorithm, Bitmap-RFC, which significantly reduces the memory requirements of RFC by applying a bitmap compression technique. To speed up classifying speed, we exploit the multithreaded architectural features in various algorithm development stages from algorithm design to algorithm implementation. As a result, Bitmap-RFC strikes a good balance between speed and space. It can significantly keep both high classification speed and reduce memory space consumption. This paper investigates the main NPU software design aspects that have dramatic performance impacts on any NPU-based implementations: memory space reduction, instruction selection, data allocation, task partitioning, and latency hiding. We experiment with an architecture-aware design principle to guarantee the high performance of the classification algorithm on an NPU implementation. The experimental results show that the Bitmap-RFC algorithm achieves 10 Gbps speed or higher and has a good scalability on Intel IXP2800 NPU.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "architecture; embedded system design; multithreading; network processor; packet classification; thread-level parallelism", } @Article{Zhuo:2008:EED, author = "Jianli Zhuo and Chaitali Chakrabarti", title = "Energy-efficient dynamic task scheduling algorithms for {DVS} systems", journal = j-TECS, volume = "7", number = "2", pages = "17:1--17:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331341", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamic voltage scaling (DVS) is a well-known low-power design technique that reduces the processor energy by slowing down the DVS processor and stretching the task execution time. However, in a DVS system consisting of a DVS processor and multiple devices, slowing down the processor increases the device energy consumption and thereby the system-level energy consumption. In this paper, we first use system-level energy consideration to derive the ``optimal '' scaling factor by which a task should be scaled if there are no deadline constraints. Next, we develop dynamic task-scheduling algorithms that make use of dynamic processor utilization and optimal scaling factor to determine the speed setting of a task. We present algorithm duEDF, which reduces the CPU energy consumption and algorithm duSYS and its reduced preemption version, duSYS\_PC, which reduce the system-level energy. Experimental results on the video-phone task set show that when the CPU power is dominant, algorithm duEDF results in up to 45\% energy savings compared to the non-DVS case. When the CPU power and device power are comparable, algorithms duSYS and duSYS\_PC achieve up to 25\% energy saving compared to CPU energy-efficient algorithm duEDF, and up to 12\% energy saving over the non-DVS scheduling algorithm. However, if the device power is large compared to the CPU power, then we show that a DVS scheme does not result in lowest energy. Finally, a comparison of the performance of algorithms duSYS and duSYS\_PC show that preemption control has minimal effect on system-level energy reduction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "DVS system; dynamic task scheduling; energy minimization; optimal scaling factor; real time", } @Article{Lee:2008:DFR, author = "Sheayun Lee and Insik Shin and Woonseok Kim and Insup Lee and Sang Lyul Min", title = "A design framework for real-time embedded systems with code size and energy constraints", journal = j-TECS, volume = "7", number = "2", pages = "18:1--18:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331342", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time embedded systems are typically constrained in terms of three system performance criteria: space, time, and energy. The performance requirements are directly translated into constraints imposed on the system's resources, such as code size, execution time, and energy consumption. These resource constraints often interact or even conflict with each other in a complex manner, making it difficult for a system developer to apply a well-defined design methodology in developing a real-time embedded system. Motivated by this observation, we propose a design framework that can flexibly balance the tradeoff involving the system's code size, execution time, and energy consumption. Given a system specification and an optimization criteria, the proposed technique generates a set of design parameters in such a way that a system cost function is minimized while the given resource constraints are satisfied. Specifically, the technique derives code generation decision for each task so that a specific version of code is selected among a number of different ones that have distinct characteristics in terms of code size and execution time. In addition, the design framework determines the voltage/frequency setting for a variable voltage processor whose supply voltage can be adjusted at runtime in order to minimize the energy consumption while execution performance is degraded accordingly. The proposed technique formulates this design process as a constrained optimization problem. We show that this optimization problem is NP-hard and then provide a heuristic solution to it. We show that these seemingly conflicting design goals can be pursued by using a simple optimization algorithm that works with a single optimization criteria. Moreover, the optimization is driven by an abstract system specification given by the system developer, so that the system development process can be automated. The results from our simulation show that the proposed algorithm finds a solution that is close to the optimal one with the average error smaller than 1.0\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "code size; embedded; energy; real-time; scheduling", } @Article{Manolache:2008:TMP, author = "Sorin Manolache and Petru Eles and Zebo Peng", title = "Task mapping and priority assignment for soft real-time applications under deadline miss ratio constraints", journal = j-TECS, volume = "7", number = "2", pages = "19:1--19:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331343", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Both analysis and design optimisation of real-time systems has predominantly concentrated on considering hard real-time constraints. For a large class of applications, however, this is both unrealistic and leads to unnecessarily expensive implementations. This paper addresses the problem of task priority assignment and task mapping in the context of multiprocessor applications with stochastic execution times and in the presence of constraints on the percentage of missed deadlines. We propose a design space exploration strategy together with a fast method for system performance analysis. Experiments emphasize the efficiency of the proposed analysis method and optimisation heuristic in generating high-quality implementations of soft real-time systems with stochastic task execution times and constraints on deadline miss ratios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "mapping; priority assignment; schedulability analysis; soft real-time systems; stochastic task execution times", } @Article{Park:2008:SRB, author = "Taejoon Park and Kang G. Shin", title = "Secure routing based on distributed key sharing in large-scale sensor networks", journal = j-TECS, volume = "7", number = "2", pages = "20:1--20:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331344", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sensor networks, usually built with a large number of small, low-cost sensor nodes, are characterized by their large-scale and unattended deployment, necessitating ``secure'' communications between nearby, as well as remote, sensor nodes for their intended applications and services. Key setup/sharing is crucial to the protection of such applications/services from attacks, but existing (public-key, cluster-based, or pairwise) solutions become too expensive (hence, inefficient) when the underlying applications/services require communications between distant sensor nodes. To remedy this inefficiency, we propose a novel distributed key-sharing scheme, in which each participating sensor node shares unique keys with a small number of other sensor nodes---called distributed key servers (DKSs)---chosen according to their geographic distance and communication direction. Using DKSs, we develop two secure routing protocols: (1) secure geographic forwarding that delivers packets by using a chain of DKS lookups, each secured with its own key and forwarded geographically; and (2) key establishment that creates a secure session between two distant sensor nodes based solely on symmetric-ciphers. These protocols enable low-cost, low-power sensors to provide high-level security at a very low cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "attack tolerance; distributed key sharing and servers; key establishment; large-scale sensor networks; secure geographic forwarding", } @Article{Cho:2008:DNP, author = "Young H. Cho and William H. Mangione-Smith", title = "Deep network packet filter design for reconfigurable devices", journal = j-TECS, volume = "7", number = "2", pages = "21:1--21:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331345", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most network routers and switches provide some protection against the network attacks. However, the rapidly increasing amount of damages reported over the past few years indicates the urgent need for tougher security. Deep-packet inspection is one of the solutions to capture packets that can not be identified using the traditional methods. It uses a list of signatures to scan the entire content of the packet, providing the means to filter harmful packets out of the network. Since one signature does not depend on the other, the filtering process has a high degree of parallelism. Most software and hardware deep-packet filters that are in use today execute the tasks under Von Neuman architecture. Such architecture can not fully take advantage of the parallelism. For instance, one of the most widely used network intrusion-detection systems, Snort, configured with 845 patterns, running on a dual 1-GHz Pentium III system, can sustain a throughput of only 50 Mbps. The poor performance is because of the fact that the processor is programmed to execute several tasks sequentially instead of simultaneously. We designed scalable deep-packet filters on field-programmable gate arrays (FPGAs) to search for all data-independent patterns simultaneously. With FPGAs, we have the ability to reprogram the filter when there are any changes to the signature set. The smallest full-pattern matcher implementation for the latest Snort NIDS fits in a single 400k Xilinx FPGA (Spartan 3-XC3S400) with a sustained throughput of 1.6 Gbps. Given a larger FPGA, the design can scale linearly to support a greater number of patterns, as well as higher data throughput.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "firewall; network intrusion detection; string filter; virus; worm", } @Article{Pasricha:2008:FEB, author = "Sudeep Pasricha and Nikil Dutt and Mohamed Ben-Romdhane", title = "Fast exploration of bus-based communication architectures at the {CCATB} abstraction", journal = j-TECS, volume = "7", number = "2", pages = "22:1--22:??", month = feb, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1331331.1331346", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:00 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Currently, system-on-chip (SoC) designs are becoming increasingly complex, with more and more components being integrated into a single SoC design. Communication between these components is increasingly dominating critical system paths and frequently becomes the source of performance bottlenecks. It, therefore, becomes imperative for designers to explore the communication space early in the design flow. Traditionally, system designers have used Pin-Accurate Bus Cycle Accurate (PA-BCA) models for early communication space exploration. These models capture all of the bus signals and strictly maintain cycle accuracy, which is useful for reliable performance exploration but results in slow simulation speeds for complex, designs, even when they are modeled using high-level languages. Recently, there have been several efforts to use the Transaction-Level Modeling (TLM) paradigm for improving simulation performance in BCA models. However, these transaction-based BCA (T-BCA) models capture a lot of details that can be eliminated when exploring communication architectures. In this paper, we extend the TLM approach and propose a new transaction-based modeling abstraction level (CCATB) to explore the communication design space. Our abstraction level bridges the gap between the TLM and BCA levels, and yields an average performance speedup of 120\% over PA-BCA and 67\% over T-BCA models, on average. The CCATB models are not only faster to simulate, but also extremely accurate and take less time to model compared to both T-BCA and PA-BCA models. We describe the mechanisms that produce the speedup in CCATB models and also analyze how the achieved simulation speedup scales with design complexity. To demonstrate the effectiveness of using CCATB for exploration, we present communication space exploration case studies from the broadband communication and multimedia application domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "communication architecture; on-chip bus; performance exploration; system-on-chip; transaction-level modeling", } @Article{DiNatale:2008:BOM, author = "Marco {Di Natale} and Valerio Pappalardo", title = "Buffer optimization in multitask implementations of {Simulink} models", journal = j-TECS, volume = "7", number = "3", pages = "23:1--23:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347376", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Automatic generation of a controller implementation from a synchronous reactive model is among the best practices for software development in the automotive and aeronautics industry, because of the possibility of simulation, model checking, and error-free implementation. This paper discusses an algorithm for optimizing the single-processor multitask implementation of Simulink models with real-time execution constraints, derived from the sampling rates of the functional blocks. Existing code generation tools enforce the addition of extra buffering and latencies whenever there is a rate transition among functional blocks. This work shows how timing analysis can be used to find the cases in which additional buffering and latency can be avoided, improving the space and time performance of the application. The proposed search algorithm allows finding a solution with reduced and possibly minimal use of buffering even for very high values of processor utilization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "code generation; real-time programming; schedulability; software models", } @Article{Trajkovic:2008:ISA, author = "Jelena Trajkovic and Alexander V. Veidenbaum and Arun Kejariwal", title = "Improving {SDRAM} access energy efficiency for low-power embedded systems", journal = j-TECS, volume = "7", number = "3", pages = "24:1--24:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347377", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "DRAM (dynamic random-access memory) energy consumption in low-power embedded systems can be very high, exceeding that of the data cache or even that of the processor. This paper presents and evaluates a scheme for reducing the energy consumption of SDRAM (synchronous DRAM) memory access by a combination of techniques that take advantage of SDRAM energy efficiencies in bank and row access. This is achieved by using small, cachelike structures in the memory controller to prefetch an additional cache block(s) on SDRAM reads and to combine block writes to the same SDRAM row. The results quantify the SDRAM energy consumption of MiBench applications and demonstrate significant savings in SDRAM energy consumption, 23\%, on average, and reduction in the energy-delay product, 44\%, on average. The approach also improves performance: the CPI is reduced by 26\%, on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded processors and low power; fetch buffer; SDRAM; write-combining buffer", } @Article{Varma:2008:AFS, author = "Ankush Varma and Eric Debes and Igor Kozintsev and Paul Klein and Bruce Jacob", title = "Accurate and fast system-level power modeling: an {XScale}-based case study", journal = j-TECS, volume = "7", number = "3", pages = "25:1--25:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347378", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Accurate and fast system modeling is central to the rapid design space exploration needed for embedded-system design. With fast, complex SoCs playing a central role in such systems, system designers have come to require MIPS-range simulation speeds and near-cycle accuracy. The sophisticated simulation frameworks that have been developed for high-speed system performance modeling do not address power consumption, although it is a key design constraint. In this paper, we define a simulation-based methodology for extending system performance modeling frameworks to also include power modeling. We demonstrate the use of this methodology with a case study of a real, complex embedded system, comprising the Intel XScale embedded microprocessor, its WMMX SIMD co processor, L1 caches, SDRAM, and the on-board address and data buses. We describe detailed power models for each of these components and validate them against physical measurements from hardware, demonstrating that such frameworks enable designers to model both power and performance at high speeds without sacrificing accuracy. Our results indicate that the power estimates obtained are accurate within 5\% of physical measurements from hardware, while simulation speeds consistently exceed a million instructions per second (MIPS).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded systems; power modeling; SystemC", } @Article{Aamodt:2008:CTI, author = "Tor M. Aamodt and Paul Chow", title = "Compile-time and instruction-set methods for improving floating- to fixed-point conversion accuracy", journal = j-TECS, volume = "7", number = "3", pages = "26:1--26:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347379", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper proposes and evaluates compile time and instruction-set techniques for improving the accuracy of signal-processing algorithms run on fixed-point embedded processors. These techniques are proposed in the context of a profile guided floating- to fixed-point compiler-based conversion process. A novel fixed-point scaling algorithm (IRP) is introduced that exploits correlations between values in a program by applying fixed-point scaling, retaining as much precision as possible without causing overflow. This approach is extended into a more aggressive scaling algorithm (IRP-SA) by leveraging the modulo nature of 2's complement addition and subtraction to discard most significant bits that may not be redundant sign-extension bits. A complementary scaling technique (IDS) is then proposed that enables the fixed-point scaling of a variable to be parameterized, depending upon the context of its definitions and uses. Finally, a novel instruction-set enhancement--- fractional multiplication with internal left shift (FMLS)---is proposed to further leverage interoperand correlations uncovered by the IRP-SA scaling algorithm. FMLS preserves a different subset of the full product's bits than traditional fractional fixed-point or integer multiplication. On average, FMLS combined with IRP-SA improves accuracy on processors with uniform bitwidth register architectures by the equivalent of 0.61 bits of additional precision for a set of signal-processing benchmarks (up to 2 bits). Even without employing FMLS, the IRP-SA scaling algorithm achieves additional accuracy over two previous fixed-point scaling algorithms by averages of 1.71 and 0.49 bits. Furthermore, as FMLS combines multiplication with a scaling shift, it reduces execution time by an average of 9.8\%. An implementation of IDS, specialized to single-nested loops, is found to improve accuracy of a lattice filter benchmark by the equivalent of more than 16-bits of precision.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compilation; digital signal processing; fixed-point; fractional multiplication; scaling; signal-to-noise ratio", } @Article{Fei:2008:EAF, author = "Yunsi Fei and Lin Zhong and Niraj K. Jha", title = "An energy-aware framework for dynamic software management in mobile computing systems", journal = j-TECS, volume = "7", number = "3", pages = "27:1--27:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347380", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy efficiency has become a very important and challenging issue for resource-constrained mobile computers. In this article, we propose a novel dynamic software management (DSOM) framework to improve battery utilization. We have designed and implemented a DSOM module in user space, independent of the operating system (OS), which explores quality-of-service (QoS) adaptation to reduce system energy and employs a priority-based preemption policy for multiple applications to avoid competition for limited energy resources. Software energy macromodels for mobile applications are employed to predict energy demand at each QoS level, so that the DSOM module is able to select the best possible trade-off between energy conservation and application QoS; it also honors the priority desired by the user. Our experimental results for some mobile applications (video player, speech recognizer, voice-over-IP) show that this approach can meet user-specified task-oriented goals and significantly improve battery utilization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "energy macromodel; runtime coordination; software adaptation", } @Article{Zhong:2008:SWE, author = "Xiliang Zhong and Cheng-Zhong Xu", title = "System-wide energy minimization for real-time tasks: {Lower} bound and approximation", journal = j-TECS, volume = "7", number = "3", pages = "28:1--28:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347381", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a dynamic voltage scaling (DVS) technique that minimizes system-wide energy consumption for both periodic and sporadic tasks. It is known that a system consists of processors and a number of other components. Energy-aware processors can be run in different speed levels; components like memory and I/O subsystems and network interface cards can be in a standby state when they are active, but idle. Processor energy optimization solutions are not necessarily efficient from the perspective of systems. Current system-wide energy optimization studies are often limited to periodic tasks with heuristics in getting approximated solutions. In this paper, we develop an exact dynamic programming algorithm for periodic tasks on processors with practical discrete speed levels. The algorithm determines the lower bound of energy expenditure in pseudopolynomial time. An approximation algorithm is proposed to provide performance guarantee with a given bound in polynomial running time. Because of their time efficiency, both the optimization and approximation algorithms can be adapted for online scheduling of sporadic tasks with irregular task releases. We prove that system-wide energy optimization for sporadic tasks is NP-hard in the strong sense. We develop (pseudo-) polynomial-time solutions by exploiting its inherent properties.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "dynamic power management; dynamic voltage scaling; power-aware scheduling; real-time systems", } @Article{Zhou:2008:CIA, author = "Ye Zhou and Edward A. Lee", title = "Causality interfaces for actor networks", journal = j-TECS, volume = "7", number = "3", pages = "29:1--29:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347382", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider concurrent models of computation where ``actors'' (components that are in charge of their own actions) communicate by exchanging messages. The interfaces of actors principally consist of ``ports,'' which mediate the exchange of messages. Actor-oriented architectures contrast with and complement object-oriented models by emphasizing the exchange of data between concurrent components rather than transformation of state. Examples of such models of computation include the classical actor model, synchronous languages, data-flow models, process networks, and discrete-event models. Many experimental and production languages used to design embedded systems are actor oriented and based on one of these models of computation. Many of these models of computation benefit considerably from having access to causality information about the components. This paper augments the interfaces of such components to include such causality information. It shows how this causality information can be algebraically composed so that compositions of components acquire causality interfaces that are inferred from their components and the interconnections. We illustrate the use of these causality interfaces to statically analyze timed models and synchronous language compositions for causality loops and data-flow models for deadlock. We also show that causality analysis for each communication cycle can be performed independently and in parallel, and it is only necessary to analyze one port for each cycle. Finally, we give a conservative approximation technique for handling dynamically changing causality properties.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "actors; behavioral types; causality; data flow; deadlock; discrete-event models; interfaces; synchronous languages; timed systems", } @Article{Shin:2008:CRT, author = "Insik Shin and Insup Lee", title = "Compositional real-time scheduling framework with periodic model", journal = j-TECS, volume = "7", number = "3", pages = "30:1--30:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347383", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "It is desirable to develop large complex systems using components based on systematic abstraction and composition. Our goal is to develop a compositional real-time scheduling framework to support abstraction and composition techniques for real-time aspects of components. In this paper, we present a formal description of compositional real-time scheduling problems, which are the component abstraction and composition problems. We identify issues that need be addressed by solutions and provide our framework for the solutions, which is based on the periodic interface. Specifically, we introduce the periodic resource model to characterize resource allocations provided to a single component. We present exact schedulability conditions for the standard Liu and Layland periodic task model and the proposed periodic resource model under EDF and RM scheduling, and we show that the component abstraction and composition problems can be addressed with periodic interfaces through the exact schedulability conditions. We also provide the utilization bounds of a periodic task set over the periodic resource model and the abstraction bounds of periodic interfaces for a periodic task set under EDF and RM scheduling. We finally present the analytical bounds of overheads that our solution incurs in terms of resource utilization increase and evaluate the overheads through simulations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "abstract; component; composition; hierarchical; interface; real-time; scheduling", } @Article{Voyiatzis:2008:SFS, author = "Artemios G. Voyiatzis and Dimitrios N. Serpanos", title = "The security of the {Fiat--Shamir} scheme in the presence of transient hardware faults", journal = j-TECS, volume = "7", number = "3", pages = "31:1--31:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347384", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Implementation cryptanalysis has emerged as a realistic threat for cryptographic systems. It consists of two classes of attacks: fault-injection and side-channel attacks. In this work, we examine the resistance of the Fiat--Shamir scheme to fault-injection attacks, since Fiat--Shamir is a popular scheme for ``light'' consumer devices, such as smartcards, in a wide range of consumer services. We prove that an existing attack, known as the Bellcore attack, is incomplete. We propose an extension to the protocol that proactively secures Fiat--Shamir systems from the Bellcore attack and we prove its strength. Finally, we introduce a new attack model, which, under stronger assumptions, can derive the secret keys from both the original Fiat--Shamir scheme as well as its proposed extension. Our approach demonstrates that countermeasures for implementation cryptanalysis must be carefully designed and that deployed systems must include appropriate protection mechanisms for all known attacks and be flexible enough to incorporate countermeasures for new ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Bellcore attack; cryptography; Fiat--Shamir identification scheme; side-channel attacks; smartcards", } @Article{Gurun:2008:NGP, author = "Selim Gurun and Chandra Krintz and Rich Wolski", title = "{NWSLite}: a general-purpose, nonparametric prediction utility for embedded systems", journal = j-TECS, volume = "7", number = "3", pages = "32:1--32:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Time series-based prediction methods have a wide range of uses in embedded systems. Many OS algorithms and applications require accurate prediction of demand and supply of resources. However, configuring prediction algorithms is not easy, since the dynamics of the underlying data requires continuous observation of the prediction error and dynamic adaptation of the parameters to achieve high accuracy. Current prediction methods are either too costly to implement on resource-constrained devices or their parameterization is static, making them inappropriate and inaccurate for a wide range of datasets. This paper presents NWSLite, a prediction utility that addresses these shortcomings on resource-restricted platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "computation offloading; CPU availability estimation; embedded systems; network performance estimation; prediction algorithms", } @Article{Yan:2008:DOD, author = "Ting Yan and Yu Gu and Tian He and John A. Stankovic", title = "Design and optimization of distributed sensing coverage in wireless sensor networks", journal = j-TECS, volume = "7", number = "3", pages = "33:1--33:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347386", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For many sensor network applications, such as military surveillance, it is necessary to provide full sensing coverage to a security-sensitive area while, at the same time, minimizing energy consumption and extending system lifetime by leveraging the redundant deployment of sensor nodes. In this paper, we propose a surveillance service for sensor networks based on a distributed energy-efficient sensing coverage protocol. In the protocol, each node is able to dynamically decide a schedule for itself to guarantee a certain degree-of-coverage (DOC) with average energy consumption inversely proportional to the node density. Several optimizations and extensions are proposed to enhance the basic design with a better load-balance feature and a longer network lifetime. We consider and address the impact of the target size and the unbalanced initial energy capacity of individual nodes to the network lifetime. Several practical issues such as the localization error, irregular sensing range, and unreliable communication links are addressed as well. Simulation shows that our protocol extends system lift-time significantly with low energy consumption. It outperforms other state-of-the-art schemes by as much as 50\% reduction in energy consumption and as much as 130\% increase in the half-life of the network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "energy conservation; sensing coverage; sensor networks", } @Article{Ozer:2008:SBE, author = "Emre {\"O}zer and Andy P. Nisbet and David Gregg", title = "A stochastic bitwidth estimation technique for compact and low-power custom processors", journal = j-TECS, volume = "7", number = "3", pages = "34:1--34:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "There is an increasing trend toward compiling from C to custom hardware for designing embedded systems in which the area and power consumption of application-specific functional units, registers, and memory blocks are heavily dependent on the bit-widths of integer operands used in computations. The actual bit-width required to store the values assigned to an integer variable during the execution of a program will not, in general, match the built-in C data types. Thus, precious area is wasted if the built-in data type sizes are used to declare the size of integer operands. In this paper, we introduce stochastic bit-width estimation that follows a simulation-based probabilistic approach to estimate the bit-widths of integer variables using extreme value theory. The estimation technique is also empirically compared to two compile-time integer bit-width analysis techniques. Our experimental results show that the stochastic bit-width estimation technique dramatically reduces integer bit-widths and, therefore, enables more compact and power-efficient custom hardware designs than the compile-time integer bit-width analysis techniques. Up to 37\% reduction in custom hardware area and 30\% reduction in logic power consumption using stochastic bit-width estimation can be attained over ten integer applications implemented on an FPGA chip.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "bit-width analysis; custom hardware; extreme value theory; FPGA; statistical estimation", } @Article{Kumar:2008:CCP, author = "Rajeev Kumar and Dipankar Das", title = "Code compression for performance enhancement of variable-length embedded processors", journal = j-TECS, volume = "7", number = "3", pages = "35:1--35:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347388", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most of the work done in the field of code compression pertains to processors with fixed-length instruction encoding. The design of a code-compression scheme for variable-length instruction encodings poses newer design challenges. In this work, we first investigate the scope for code compression on variable-length instruction-set processors whose encodings are already optimized to a certain extent with respect to their usage. For such ISAs instruction boundaries are not known prior to decoding. Another challenging task of designing a code-compression scheme for such ISAs is designing the decompression hardware, which must decompress code postcache so that we gain in performance. We present two dictionary-based code compression schemes. The first algorithm uses a bit-vector; the second one uses reserved instructions to identify code words. We design additional logic for each of the schemes to decompress the code on-the-fly. We test the two algorithms with a variable-length RISC processor. We provide a detailed experimental analysis of the empirical results obtained by extensive simulation-based design space exploration for this system. The optimized decompressor can now execute compressed program faster than the native program. The experiments demonstrate reduction in code size (up to 30\%), speed-up (up to 15\%), and bus-switching activity (up to 20\%). We also implement one decompressor in a hardware description language and synthesize it to illustrate the small overheads associated with the proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "bus switching; code compression; code decompression; embedded systems; instruction memory; RISC processor; variable-length ISAs", } @Article{Wilhelm:2008:WCE, author = "Reinhard Wilhelm and Jakob Engblom and Andreas Ermedahl and Niklas Holsti and Stephan Thesing and David Whalley and Guillem Bernat and Christian Ferdinand and Reinhold Heckmann and Tulika Mitra and Frank Mueller and Isabelle Puaut and Peter Puschner and Jan Staschulat and Per Stenstr{\"o}m", title = "The worst-case execution-time problem---overview of methods and survey of tools", journal = j-TECS, volume = "7", number = "3", pages = "36:1--36:??", month = apr, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1347375.1347389", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 12 15:22:21 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The determination of upper bounds on execution times, commonly called worst-case execution times (WCETs), is a necessary step in the development and validation process for hard real-time systems. This problem is hard if the underlying processor architecture has components, such as caches, pipelines, branch prediction, and other speculative components. This article describes different approaches to this problem and surveys several commercially available tools 1 and research prototypes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "hard real time; worst-case execution times", } @Article{Hessell:2008:EES, author = "Fabiano Hessell and Kenneth Kent and Dionisios Pnevmatikatos", title = "Editorial: {Embedded} systems --- new challenges and future directions", journal = j-TECS, volume = "7", number = "4", pages = "37:1--37:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376805", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2008:RFF, author = "Chanik Park and Wonmoon Cheon and Jeonguk Kang and Kangho Roh and Wonhee Cho and Jin-Soo Kim", title = "A reconfigurable {FTL} (flash translation layer) architecture for {NAND} flash-based applications", journal = j-TECS, volume = "7", number = "4", pages = "38:1--38:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376806", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, a novel FTL (flash translation layer) architecture is proposed for NAND flash-based applications such as MP3 players, DSCs (digital still cameras) and SSDs (solid-state drives). Although the basic function of an FTL is to translate a logical sector address to a physical sector address in flash memory, efficient algorithms of an FTL have a significant impact on performance as well as the lifetime. After the dominant parameters that affect the performance and endurance are categorized, the design space of the FTL architecture is explored based on a diverse workload analysis. With the proposed FTL architectural framework, it is possible to decide which configuration of FTL mapping parameters yields the best performance, depending on the differing characteristics of various NAND flash-based applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Flash memory; FTL; performance analysis; reconfigurable architecture", } @Article{Popovici:2008:PBS, author = "Katalin Popovici and Xavier Guerin and Frederic Rousseau and Pier Stanislao Paolucci and Ahmed Amine Jerraya", title = "Platform-based software design flow for heterogeneous {MPSoC}", journal = j-TECS, volume = "7", number = "4", pages = "39:1--39:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376807", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Current multimedia applications demand complex heterogeneous multiprocessor architectures with specific communication infrastructure in order to achieve the required performances. Programming these architectures usually results in writing separate low-level code for the different processors (DSP, microcontroller), implying late global validation of the overall application with the hardware platform. We propose a platform-based software design flow able to efficiently use the resources of the architecture and allowing easy experimentation of several mappings of the application onto the platform resources. We use a high-level environment to capture both application and architecture initial representations. An executable software stack is generated automatically for each processor from the initial model. The software generation and validation is performed gradually corresponding to different software abstraction levels. Specific software development platforms (abstract models of the architecture) are generated and used to allow debugging of the different software components with explicit hardware-software interaction. We applied this approach on a multimedia platform, involving a high performance DSP and a RISC processor, to explore communication architecture and generate an efficient executable code for a multimedia application. Based on automatic tools, the proposed flow increases productivity and preserves design quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "multimedia; Multiprocessor system-on chip; programming environment; Simulink; software design; SystemC; transaction level modeling", } @Article{Chattopadhyay:2008:PPA, author = "A. Chattopadhyay and H. Ishebabi and X. Chen and Z. Rakosi and K. Karuri and D. Kammler and R. Leupers and G. Ascheid and H. Meyr", title = "Prefabrication and postfabrication architecture exploration for partially reconfigurable {VLIW} processors", journal = j-TECS, volume = "7", number = "4", pages = "40:1--40:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376808", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern application-specific instruction-set processors (ASIPs) face the daunting task of delivering high performance for a wide range of applications. For enhancing the performance, architectural features, for example, pipelining, VLIW, are often employed in ASIPs, leading to high design complexity. Integrated ASIP design environments, like template-based approaches and language-driven approaches, provide an answer to this growing design complexity. At the same time, increasing hardware design costs have motivated the processor designers to introduce high flexibility in the processor. Flexibility, in its most effective form, can be introduced to the ASIP by coupling a reconfigurable unit to the base processor. Because of its obvious benefits, several reconfigurable ASIPs (rASIPs) have been designed for years. This design paradigm gained momentum with the advent of coarse-grained FPGAs, where the lack of domain-specific performance common in general-purpose FPGAs are largely overcome by choosing application-dependent basic functional units. These rASIP designs lack a generic flow from high-level specification, resulting in intuitive design decisions and hard-to-retarget processor design tools. Although partial, template-based approaches for rASIP design is existent, a clear design methodology especially for the prefabrication architecture exploration is not present. In order to address this issue, a high-level specification and design methodology for partially reconfigurable VLIW processors is proposed in this article. To show the benefit of this approach, a commercial VLIW processor is used as the base architecture and two domains of applications are studied for potential performance gain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ASIP; coarse-grained FPGA; VLIW", } @Article{Lin:2008:MAC, author = "Yi-Neng Lin and Ying-Dar Lin and Yuan-Cheng Lai and Kuo-Kun Tseng", title = "Modeling and analysis of core-centric network processors", journal = j-TECS, volume = "7", number = "4", pages = "41:1--41:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376809", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network processors can be categorized into two types, the coprocessors-centric model in which data-plane is handled by coprocessors, and the core-centric model in which the core processes most of the data-plane packets yet offloading some tasks to coprocessors. While the former has been properly explored over various applications, research regarding the latter remain limited. Based on the previous experience of prototyping the virtual private network (VPN) over the IXP425 network processor, this work aims to derive design implications for the core-centric model performing computational intensive applications. From system and IC vendors' perspectives, the continuous-time Markov chain and Petri net simulations are adopted to explore this architecture. Analytical results prove to be quite inline with those of the simulation and implementation. With subsequent investigation, we find that appropriate process run lengths can improve the effective core utilization by 2.26 times, and by offloading the throughput boosts 7.5 times. The results also suggest single-process programming, since context-switch overhead impacts considerably on the performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "core-centric; embedded system; modeling; Network processor; simulation", } @Article{Get:2008:PFE, author = "Jerome Hugues Get and Bechir Zalila Get and Laurent Pautet Get and Fabrice Kordon", title = "From the prototype to the final embedded system using the {Ocarina AADL} tool suite", journal = j-TECS, volume = "7", number = "4", pages = "42:1--42:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376810", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Building distributed deal-time embedded systems requires a stringent methodology, from early requirement capture to full implementation. However, there is a strong link between the requirements and the final implementation (e.g., scheduling and resource dimensioning). Therefore, a rapid prototyping process based on automation of tedious and error-prone tasks (analysis and code generation) is required to speed up the development cycle. In this article, we show how the AADL ({\em Architecture Analysis and Design Language\/}), which appeared in late 2004, helps solve these issues thanks to a dedicated tool suite. We then detail the prototyping process and its current implementation: Ocarina.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "AADL; distributed; DRE; embedded; Ocarina; PolyORB-HI; real-time", } @Article{Benveniste:2008:CHR, author = "Albert Benveniste and Beno{\^\i}t Caillaud and Luca P. Carloni and Paul Caspi and Alberto L. Sangiovanni-Vincentelli", title = "Composing heterogeneous reactive systems", journal = j-TECS, volume = "7", number = "4", pages = "43:1--43:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376811", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a compositional theory of heterogeneous reactive systems. The approach is based on the concept of tags marking the events of the signals of a system. Tags can be used for multiple purposes from indexing evolution in time (time stamping) to expressing relations among signals, like coordination (e.g., synchrony and asynchrony) and causal dependencies. The theory provides flexibility in system modeling because it can be used both as a unifying mathematical framework to relate heterogeneous models of computations and as a formal vehicle to implement complex systems by combining heterogeneous components. In particular, we introduce an algebra of tag structures to define heterogeneous parallel composition formally. Morphisms between tag structures are used to define relationships between heterogeneous models at different levels of abstraction. In particular, they can be used to represent design transformations from tightly synchronized specifications to loosely-synchronized implementations. The theory has an important application in the correct-by-construction deployment of synchronous design on distributed architectures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Compositionality; correct-by-construction design; GALS; models of computation; reactive systems", } @Article{Gebotys:2008:EAW, author = "Catherine H. Gebotys and Brian A. White", title = "{EM} analysis of a wireless {Java}-based {PDA}", journal = j-TECS, volume = "7", number = "4", pages = "44:1--44:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376812", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The susceptibility of wireless portable devices to electromagnetic (EM) attacks is largely unknown. If analysis of electromagnetic (EM) waves emanating from the wireless device during a cryptographic computation do leak sufficient information, it may be possible for an attacker to reconstruct the secret key. Possession of the secret cryptographic key would render all future wireless communications insecure and cause further potential problems, such as identity theft. Despite the complexities of a PDA wireless device, such as operating system events, interrupts, cache misses, and other interfering events, this article demonstrates that, for the first time, repeatable EM differential attacks are possible. The proposed differential analysis methodology involves precharacterization of the PDA device (thresholding and pattern recognition), and a new frequency-based differential analysis. Unlike previous research, the new methodology does not require perfect alignment of EM frames and is repeatable in the presence of a complex embedded system (including cache misses, operating system events, etc), thus supporting attacks on real embedded systems. This research is important for future wireless embedded systems, which will increasingly demand higher levels of security.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "countermeasures; EM analysis; power attacks; Side-channel analysis", } @Article{Ayav:2008:IFT, author = "Tolga Ayav and Pascal Fradet and Alain Girault", title = "Implementing fault-tolerance in real-time programs by automatic program transformations", journal = j-TECS, volume = "7", number = "4", pages = "45:1--45:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376813", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a formal approach to implement fault-tolerance in real-time embedded systems. The initial fault-intolerant system consists of a set of independent periodic tasks scheduled onto a set of fail-silent processors connected by a reliable communication network. We transform the tasks such that, assuming the availability of an additional spare processor, the system tolerates one failure at a time (transient or permanent). Failure detection is implemented using heartbeating, and failure masking using checkpointing and rollback. These techniques are described and implemented by automatic program transformations on the tasks' programs. The proposed formal approach to fault-tolerance by program transformations highlights the benefits of separation of concerns. It allows us to establish correctness properties and to compute optimal values of parameters to minimize fault-tolerance overhead. We also present an implementation of our method, to demonstrate its feasibility and its efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "checkpointing; correctness proofs; Fault-tolerance; heartbeating; program transformations", } @Article{Middha:2008:MMS, author = "Bhuvan Middha and Matthew Simpson and Rajeev Barua", title = "{MTSS}: {Multitask} stack sharing for embedded systems", journal = j-TECS, volume = "7", number = "4", pages = "46:1--46:??", month = jul, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1376804.1376814", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 5 19:32:59 MDT 2008", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Out-of-memory errors are a serious source of unreliability in most embedded systems. Applications run out of main memory because of the frequent difficulty of estimating the memory requirement before deployment, either because it depends on input data, or because certain language features prevent estimation. The typical lack of disks and virtual memory in embedded systems has a serious consequence when an out-of-memory error occurs. Without swap space, the system crashes if its memory footprint exceeds the available memory by even 1 byte. This work improves reliability for multitasking embedded systems by proposing MTSS, a multitask stack sharing technique. If a task attempts to overflow the bounds of its allocated stack space, MTSS grows its stack into the stack memory space allocated for other tasks. This technique can avoid the out-of-memory error if the extra space recovered is sufficient to complete execution. Experiments show that MTSS is able to recover an average of 54\% of the stack space allocated to the overflowing task in the free space of other tasks. In addition, unlike conventional systems, MTSS detects memory overflows, allowing the possibility of remedial action or a graceful exit if the recovered space is not enough. Alternatively, MTSS can be used for decreasing the required physical memory of an embedded system by reducing the initial memory allocated to each of the tasks and recovering the deficit by sharing stack with other tasks. The overheads of MTSS are low: the runtime and energy overheads are 3.1\% and 3.2\%, on average. These are tolerable given that reliability is the most important concern in virtually all systems, ahead of other concerns, such as runtime and energy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "cactus stack; data compression; heap overflow; meshed stack; Out-of-memory errors; reliability; reuse; runtime checks; stack overflow; virtual memory", } @Article{Inoue:2008:FAC, author = "Hiroaki Inoue and Junji Sakai and Sunao Torii and Masato Edahiro", title = "{FIDES}: an advanced chip multiprocessor platform for secure next generation mobile terminals", journal = j-TECS, volume = "8", number = "1", pages = "1:1--1:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457247", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose a secure platform on a chip multiprocessor, FIDES, in order to enable next generation mobile terminals to execute downloaded native applications for Linux. Its most important feature is the higher security based on multigrained separation mechanisms. Four new technologies support the FIDES platform: bus filter logic, XIP kernels, policy separation, and dynamic access control. With these technologies, the FIDES platform can tolerate both application-level and kernel-level bugs on an actual download subsystem. Thus, the best-suited platform to secure next generation mobile terminals is FIDES.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "chip multiprocessor; Secure mobile terminal; SELinux", } @Article{Park:2008:ATL, author = "Taejoon Park and Kang G. Shin", title = "Attack-tolerant localization via iterative verification of locations in sensor networks", journal = j-TECS, volume = "8", number = "1", pages = "2:1--2:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457248", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In sensor networks, secure localization --- determining sensors' locations in a hostile, untrusted environment --- is a challenging, but very important, problem that has not yet been addressed effectively. This paper presents an attack-tolerant localization protocol, called {\em Verification for Iterative Localization\/} (VeIL), under which sensors cooperatively safeguard the localization service. By exploiting the high spatiotemporal correlation existing between adjacent nodes, VeIL realizes (a) adaptive management of a profile for normal localization behavior, and (b) distributed detection of false locations advertised by attackers by comparing them against the profile of normal behavior. Our analysis and simulation results show that VeIL achieves high-level tolerance to many critical attacks, and is computationally feasible on resource-limited sensors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Anomaly detection; attack-tolerance; localization; recursive least squares; sensor networks", } @Article{Mitra:2008:VAD, author = "Sayan Mitra and Daniel Liberzon and Nancy Lynch", title = "Verifying average dwell time of hybrid systems", journal = j-TECS, volume = "8", number = "1", pages = "3:1--3:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457249", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Average dwell time (ADT) properties characterize the rate at which a hybrid system performs mode switches. In this article, we present a set of techniques for verifying ADT properties. The stability of a hybrid system A can be verified by combining these techniques with standard methods for checking stability of the individual modes of A.\par We introduce a new type of simulation relation for hybrid automata --- {\em switching simulation\/} --- for establishing that a given automaton A switches more rapidly than another automaton B. We show that the question of whether a given hybrid automaton has ADT {\tau}$_{{\em a \/ }}$ can be answered either by checking an invariant or by solving an optimization problem. For classes of hybrid automata for which invariants can be checked automatically, the invariant-based method yields an automatic method for verifying ADT; for automata that are outside this class, the invariant has to be checked using inductive techniques. The optimization-based method is automatic and is applicable to a restricted class of initialized hybrid automata. A solution of the optimization problem either gives a counterexample execution that violates the ADT property, or it confirms that the automaton indeed satisfies the property. The optimization and the invariant-based methods can be used in combination to find the unknown ADT of a given hybrid automaton.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Hybrid systems; optimization-based verification; simulation relation", } @Article{Schirner:2008:QAS, author = "Gunar Schirner and Rainer D{\"o}mer", title = "Quantitative analysis of the speed\slash accuracy trade-off in transaction level modeling", journal = j-TECS, volume = "8", number = "1", pages = "4:1--4:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457250", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The increasing complexity of embedded systems requires modeling at higher levels of abstraction. Transaction level modeling (TLM) has been proposed to abstract communication for high-speed system simulation and rapid design space exploration. Although being widely accepted for its high performance and efficiency, TLM often exhibits a significant loss in model accuracy.\par In this article, we systematically analyze and quantify the speed/accuracy trade-off in TLM. To this end, we provide a classification of TLM abstraction levels based on model granularity and define appropriate metrics and test setups to quantitatively measure and compare the performance and accuracy of such models.\par Addressing several classes of embedded communication protocols, we apply our analysis to three common bus architectures, the industry-standard AMBA advanced high-performance bus (AHB) as an on-chip parallel bus, the controller area network (CAN) as an off-chip serial bus, and the Motorola ColdFire Master Bus as an example for a custom embedded processor bus.\par Based on the analysis of these individual busses, we then generalize our results for a broader conclusion. The general TLM trade-off offers gains of up to four orders of magnitude in simulation speed, generally however, at the price of low accuracy. We conclude further that model granularity is the key to efficient TLM abstraction, and we identify conditions for accuracy of abstract models. As a result, this article provides general guidelines that allow the system designer to navigate the TLM trade-off effectively and choose the most suitable model for the given application with fast and accurate results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "System level design; system-on-chip; transaction level modeling", } @Article{Zhou:2008:DAT, author = "Xiangrong Zhou and Peter Petrov", title = "Direct address translation for virtual memory in energy-efficient embedded systems", journal = j-TECS, volume = "8", number = "1", pages = "5:1--5:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457251", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a methodology for virtual memory support in energy-efficient embedded systems. A holistic approach is proposed, where the combined efforts of compiler, operating system, and hardware architecture achieve a significant system power reductions. The application information extracted and analyzed by the compiler is utilized dynamically by the microarchitecture and the operating system to perform energy-efficient and, for many memory references, time-deterministic address translations. We demonstrate that by using application information regarding virtual memory layout, an efficient and conflict-free translation process can be implemented through the utilization of a small hardware direct translation table (DTT) accessed in an application-specific manner. The set of virtual pages is partitioned into groups, such that for each group only a few of the least significant bits are used as an index to obtain the physical page number. We outline an efficient compile-time algorithm for identifying these groups and allocate their translation entries optimally into the DTT. The introduced hardware is minimal in terms of area, performance, and power overhead, while offering the flexibility of software programmability. This is achieved through a small set of registers and tables, which are made software accessible. We have quantitatively evaluated the proposed methodology on a number of embedded applications, including voice, image, and video processing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "address translation; Low-power embedded systems; virtual memory", } @Article{Park:2008:QSL, author = "Jiyong Park and Jaesoo Lee and Saehwa Kim and Seongsoo Hong", title = "Quasistatic shared libraries and {XIP} for memory footprint reduction in {MMU}-less embedded systems", journal = j-TECS, volume = "8", number = "1", pages = "6:1--6:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457252", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Despite a rapid decrease in the price of solid state memory devices, system memory is still a very precious resource in embedded systems. The use of shared libraries and execution-in-place (XIP) is known to be effective in significantly reducing memory usage. Unfortunately, many resource-constrained embedded systems lack an MMU, making it extremely difficult to support these techniques. To address this problem, we propose a novel shared library technique called a quasi-static shared library and an XIP, both based on our enhanced position independent code technique. In our quasistatic shared libraries, global symbols are bound to pseudoaddresses at linking time and actual physical addresses are bound at loading time. Unlike conventional shared libraries, they do not require symbol tables that take up valuable memory space and, therefore, allow for expedited address translation at runtime. Our XIP technique is facilitated by our enhanced position independent code where a data section can be arbitrarily located. Both the shared library and XIP techniques are made possible by emulating an MMU's memory mapping feature with a data section base register (DSBR) and a data section base table (DSBT).\par We have implemented these proposed techniques in a commercial ADSL (Asymmetric Digital Subscriber Line) home network gateway equipped with an MMU-less ARM7TDMI processor core, 2MB flash memory, and 16MB RAM. We measured its memory usage and evaluated its performance overhead by conducting a series of experiments. These experiments clearly demonstrate the effectiveness of our techniques in reducing memory usage. The results are impressive: 35\% reduction in flash memory usage when using only the shared library and 30\% reduction in RAM usage when using the shared library and XIP together. These results were achieved with only a negligible performance penalty of less than 4\%. Even though these techniques were applied to uClinux-based embedded systems, they can be used for any MMU-less real-time operating system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Embedded systems; memory footprint reduction; MMU-less; quasi-static linking; shared library", } @Article{Yan:2008:AWC, author = "Jun Yan and Wei Zhang", title = "Analyzing the worst-case execution time for instruction caches with prefetching", journal = j-TECS, volume = "8", number = "1", pages = "7:1--7:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457253", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Time predictability is one of the most important design considerations for real-time systems. In this article, we study the impact of instruction prefetching on the worst-case performance of instruction caches. We extend the static cache simulation technique to model and compute the worst-case instruction cache performance with prefetching. The evaluation results show that instruction prefetching can benefit both the average-case and worst-case performance; however, the degree of the worst-case performance improvement due to instruction prefetching is less than that of the average-case performance. As a result, the time variation of computing is increased by instruction prefetching. Also, our experimental results indicate that the prefetching distance can significantly impact the worst-case performance of instruction caches with instruction prefetching. Specifically, when the prefetching distance is equal to the L1 miss penalty, the worst-case execution time with instruction prefetching is minimized.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "hard real-time; instruction caches; instruction prefetching; Worst-case execution time analysis", } @Article{Aaraj:2008:ADH, author = "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha", title = "Analysis and design of a hardware\slash software trusted platform module for embedded systems", journal = j-TECS, volume = "8", number = "1", pages = "8:1--8:??", month = dec, year = "2008", CODEN = "????", DOI = "https://doi.org/10.1145/1457246.1457254", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 6 14:36:01 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Trusted platforms have been proposed as a promising approach to enhance the security of general-purpose computing systems. However, for many resource-constrained embedded systems, the size and cost overheads of a separate Trusted Platform Module (TPM) chip are not acceptable. One alternative is to use a software-based TPM, which implements TPM functions using software that executes in a protected execution domain on the embedded processor itself. However, since many embedded systems have limited processing capabilities and are battery-powered, it is also important to ensure that the computational and energy requirements for SW-TPMs are acceptable.\par In this article, we perform an evaluation of the energy and execution time overheads for a SW-TPM implementation on a handheld appliance (Sharp Zaurus PDA). We characterize the execution time and energy required by each TPM command through actual measurements on the target platform. We observe that for most commands, overheads are primarily due to the use of 2,048-bit RSA operations that are performed within the SW-TPM. In order to alleviate SW-TPM overheads, we evaluate the use of Elliptic Curve Cryptography (ECC) as a replacement for the RSA algorithm specified in the Trusted Computing Group (TCG) standards. In addition, we also evaluate the overheads of using the SW-TPM in the context of various end applications, including trusted boot of the Linux operating system (OS), a secure VoIP client, and a secure Web browser. Furthermore, we analyze the computational workload involved in running SW-TPM commands using ECC. We then present a suite of hardware and software enhancements to accelerate these commands --- generic custom instructions and exploitation of parallel processing capabilities in multiprocessor systems-on-chip (SoCs). We report results of evaluating the proposed architectures on a commercial embedded processor (Xtensa from Tensilica). Through uniprocessor and multiprocessor optimizations, we could achieve speed-ups of up to 5.71X for individual TPM commands.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Custom instructions; embedded systems; multiprocessor systems", } @Article{Suresh:2009:EEE, author = "Dinesh C. Suresh and Banit Agrawal and Jun Yang and Walid Najjar", title = "Energy-efficient encoding techniques for off-chip data buses", journal = j-TECS, volume = "8", number = "2", pages = "9:1--9:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457256", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Reducing the power consumption of computing devices has gained a lot of attention recently. Many research works have focused on reducing power consumption in the off-chip buses as they consume a significant amount of total power. Since the bus power consumption is proportional to the switching activity, reducing the bus switching is an effective way to reduce bus power. While numerous techniques exist for reducing bus power in address buses, only a handful of techniques have been proposed for data-bus power reduction, where frequent value encoding (FVE) is the best existing scheme to reduce the transition activity on the data buses.\par In this article, we propose improved frequent value data bus-encoding techniques aimed at reducing more switching activity and, hence, power consumption. We propose three new schemes and five new variations to exploit bit-wise temporal and spatial locality in the data-bus values. Our techniques just use one external control signal and capture bit-wise locality to efficiently encode data values. For all the embedded and SPEC applications we tested, the overall average switching reduction is 53\% over unencoded data and 10\% more than the conventional FVE scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "bus switching; encoding; internal capacitances; Low-power data buses", } @Article{Kejariwal:2009:ELL, author = "Arun Kejariwal and Alexander V. Veidenbaum and Alexandru Nicolau and Milind Girkar and Xinmin Tian and Hideki Saito", title = "On the exploitation of loop-level parallelism in embedded applications", journal = j-TECS, volume = "8", number = "2", pages = "10:1--10:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457257", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in the silicon technology have enabled increasing support for hardware parallelism in embedded processors. Vector units, multiple processors/cores, multithreading, special-purpose accelerators such as DSPs or cryptographic engines, or a combination of the above have appeared in a number of processors. They serve to address the increasing performance requirements of modern embedded applications. To what extent the available hardware parallelism can be exploited is directly dependent on the amount of parallelism inherent in the given application and the congruence between the granularity of hardware and application parallelism. This paper discusses how loop-level parallelism in embedded applications can be exploited in hardware and software. Specifically, it evaluates the efficacy of automatic loop parallelization and the performance potential of different types of parallelism, viz., true thread-level parallelism (TLP), speculative thread-level parallelism and vector parallelism, when executing loops. Additionally, it discusses the interaction between parallelization and vectorization. Applications from both the industry-standard EEMBC{\reg},$^1$ 1.1, EEMBC 2.0 and the academic MiBench embedded benchmark suites are analyzed using the Intel{\reg}$^2$ C compiler. The results show the performance that can be achieved today on real hardware and using a production compiler, provide upper bounds on the performance potential of the different types of thread-level parallelism, and point out a number of issues that need to be addressed to improve performance. The latter include parallelization of libraries such as libc and design of parallel algorithms to allow maximal exploitation of parallelism. The results also point to the need for developing new benchmark suites more suitable to parallel compilation and execution.\par $^1$ Other names and brands may be claimed as the property of others.\par $^2$ Intel is a trademark of Intel Corporation or its subsidiaries in the United States and other countries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "libraries; Multi-cores; multithreading; parallel loops; programming models; system-on-chip (Soc); thread-level speculation; vectorization", } @Article{Hashemi:2009:TDS, author = "Matin Hashemi and Soheil Ghiasi", title = "Throughput-driven synthesis of embedded software for pipelined execution on multicore architectures", journal = j-TECS, volume = "8", number = "2", pages = "11:1--11:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457258", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a methodology for pipelined software synthesis of streaming applications. First, we develop a versatile task assignment algorithm capable of optimizing realistically-arbitrary cost functions for two cores. The algorithm is exact (i.e., theoretically optimal) contrary to existing heuristics. Second, our approximation technique provides an adjustable knob to trade solution quality with algorithm runtime and memory. Third, we develop a recursive heuristic for more cores. FPGA-based emulated experiments validate our theoretical results. The exact algorithm yields 1.7 \times throughput improvement. The approximation method offers a range of tradeoff points (e.g., 3 \times faster with 20 \times less memory) while degrading the throughput only 1\% to 5\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Embedded software; graph partitioning; multi-core hardware; streaming applications; task assignment", } @Article{Chattopadhyay:2009:PPA, author = "A. Chattopadhyay and H. Ishebabi and X. Chen and Z. Rakosi and K. Karuri and D. Kammler and R. Leupers and G. Ascheid and H. Meyr", title = "Pre- and postfabrication architecture exploration for partially reconfigurable {VLIW} processors", journal = j-TECS, volume = "8", number = "2", pages = "12:1--12:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457259", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern application-specific instruction-set processors (ASIPs) face the daunting task of delivering high performance for a wide range of applications. For enhancing the performance, architectural features (e.g., pipelining, VLIW) are often employed in ASIPs, leading to high design complexity. Integrated ASIP design environments like template-based approaches and language-driven approaches provide an answer to this growing design complexity. At the same time, increasing hardware design costs have motivated the processor designers to introduce high flexibility in the processor. Flexibility, in its most effective form, can be introduced to the ASIP by coupling a reconfigurable unit to the base processor. Due to its obvious benefits, several reconfigurable ASIPs (rASIPs) have been designed for years. This design paradigm gained momentum with the advent of coarse-grained FPGAs, where the lack of domain-specific performance common in general-purpose FPGAs are largely overcome by choosing application-dependent basic functional units. These rASIP designs lack a generic flow from high-level specification, resulting into intuitive design decisions and hard-to-retarget processor design tools. Although partial, template-based approaches for rASIP design is existent, a clear design methodology especially for the prefabrication architecture exploration is not present. In order to address this issue, a high-level specification and design methodology for partially reconfigurable VLIW processors is proposed in this article. To show the benefit of this approach a commercial VLIW processor is used as the base architecture and two domains of applications are studied for potential performance gain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ASIP; coarse-grained FPGA; VLIW", } @Article{Lin:2009:MAC, author = "Yi-Neng Lin and Ying-Dar Lin and Kuo-Kun Tseng and Yuan-Cheng Lai", title = "Modeling and analysis of core-centric network processors", journal = j-TECS, volume = "8", number = "2", pages = "13:1--13:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457260", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network processors can be categorized into two types, the coprocessors-centric model in which the data-plane is handled by coprocessors, and the core-centric model in which the core processes most of the data-plane packets yet offloading some tasks to coprocessors. While the former has been properly explored over various applications, researches regarding the latter remain limited. Based on the previous experience of prototyping the virtual private network (VPN) over the IXP425 network processor, this work aims to derive design implications for the core-centric model performing computational intensive applications. From system and IC vendors' perspectives, the continuous-time Markov chain and Petri net simulations are adopted to explore this architecture. Analytical results prove to be quite inline with those of the simulation and implementation. With subsequent investigation we find that appropriate process run lengths can improve the effective core utilization by 2.26 times, and by offloading the throughput boosts 7.5 times. The results also suggest single process programming since context switch overhead impacts considerably on the performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "core-centric; embedded system; modeling; Network processor; simulation", } @Article{Zhou:2009:CLC, author = "Xiangrong Zhou and Peter Petrov", title = "Cross-layer customization for rapid and low-cost task preemption in multitasked embedded systems", journal = j-TECS, volume = "8", number = "2", pages = "14:1--14:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457261", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Preemptive multitasking is widely used in many low-cost and real-time embedded applications for its superior hardware utilization. The frequent and asynchronous context switches, however, require the preservation and restoration of the task state, thus resulting in a large number of memory transfer instructions. As a consequence, task responsiveness and application throughput can be significantly deteriorated. To address this problem we propose a cross-layer customization framework which through the close cooperation of compiler, OS, and hardware architecture achieves rapid and low-cost task switch. Application information extracted during compile-time regarding state liveness is exploited in order to preserve a minimal amount of task state on task preemption. We introduce two complementary techniques to implement the application-aware state preservation. The first technique utilizes compiler-generated custom routines which preserve/restore an extremely small live context at judiciously selected points in the application code. The second technique requires more sophisticated hardware support. It employs an OS-controlled register file mapping to achieve a rapid context switch. By mapping a small fraction of the register file in a single clock cycle, a context switch is achieved requiring no memory transfers for the majority of cases to preserve/restore the live state. The effect of aggressively replicated register files, where each task is given its own replica, is achieved with the hardware cost of only adding from 25\% to 50\% extra physical registers. Through the utilization of these novel mechanisms, a significant improvement on task response time is achieved as the context-switch cost is minimized.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Auerbach:2009:LLT, author = "Joshua Auerbach and David F. Bacon and Daniel Iercan and Christoph M. Kirsch and V. T. Rajan and Harald R{\"o}ck and Rainer Trummer", title = "Low-latency time-portable real-time programming with {Exotasks}", journal = j-TECS, volume = "8", number = "2", pages = "15:1--15:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457262", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "{\em Exotasks\/} are a novel Java programming construct that achieve three important goals. They achieve low latency while allowing the fullest use of Java language features, compared to previous attempts to restrict the Java language for use in the submillisecond domain. They support pluggable schedulers, allowing easy implementation of new scheduling paradigms in a real-time Java system. They can achieve deterministic timing, even in the presence of other Java threads, and across changes of hardware and software platform. To achieve these goals, the program is divided into tasks with private heaps. Tasks may be strongly isolated, communicating only with each other and guaranteeing determinism, or weakly isolated, allowing some communication with the rest of the Java application. Scheduling of the tasks' execution, garbage collection, and value passing is accomplished by the pluggable scheduler. Schedulers that we have written employ logical execution time (LET) in association with strong isolation to achieve time portability. We have also built a quad-rotor model helicopter, the JAviator, which we use to evaluate our implementation of Exotasks in an experimental embedded version of IBM's J9 real-time virtual machine. Our experiments show that we are able to maintain very low scheduling jitter and deterministic behavior in the face of variations in both software load and hardware platform. We also show that Exotasks perform nearly as well as Eventrons on a benchmark audio application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Real-time scheduling; time portability; UAVs; virtual machine", } @Article{Ahn:2009:RCT, author = "Minwook Ahn and Yunheung Paek", title = "Register coalescing techniques for heterogeneous register architecture with copy sifting", journal = j-TECS, volume = "8", number = "2", pages = "16:1--16:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457263", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Optimistic coalescing has been proven as an elegant and effective technique that provides better chances of safely coloring more registers in register allocation than other coalescing techniques. Its algorithm originally assumes homogeneous registers, which are all gathered in the same register file. Although this register architecture is still common in most general-purpose processors, embedded processors often contain heterogeneous registers, which are scattered in physically different register files dedicated for each dissimilar purpose and use. In this work, we show that optimistic coalescing is also useful for an embedded processor to better handle such heterogeneity of the register architecture, and developed a modified algorithm for optimal coalescing that helps a register allocator. In the experiment, an existing register allocator was able to achieve up to 13.0\% reduction in code size through our coalescing, and avoid many spills that would have been generated without our scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compiler; embedded processors; heterogeneous register architecture; Register allocation; register coalescing", } @Article{Mozumdar:2009:CSP, author = "Mohammad Mostafizur Rahman Mozumdar and Luciano Lavagno and Laura Vanzago", title = "A comparison of software platforms for wireless sensor networks: {MANTIS}, {TinyOS}, and {ZigBee}", journal = j-TECS, volume = "8", number = "2", pages = "17:1--17:??", month = jan, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1457255.1457264", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 5 19:15:05 MST 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless sensor networks are characterized by very tight code size and power constraints and by a lack of well-established standard software development platforms such as Posix. In this article, we present a comparative study between a few fairly different such platforms, namely MANTIS, TinyOS, and ZigBee, when considering them from the application developer's perspective, that is, by focusing mostly on functional aspects, rather than on performance or code size. In other words, we compare both the tasking model used by these platforms and the API libraries they offer. Sensor network applications are basically event based, so most of the software platforms are also built on considering event handling mechanism, however some use a more traditional thread based model. In this article, we consider implementations of a simple generic application in MANTIS, TinyOS, and the Ember ZigBee development framework, with the goal of depicting major differences between these platforms, and suggesting a programming style aimed at maximizing portability between them.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "application porting; software platform; Wireless sensor networks", } @Article{Unnikrishnan:2009:RMR, author = "P. Unnikrishnan and G. Chen and M. Kandemir and M. Karakoy and I. Kolcu", title = "Reducing memory requirements of resource-constrained applications", journal = j-TECS, volume = "8", number = "3", pages = "17:1--17:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509289", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded computing platforms are often resource constrained, requiring great design and implementation attention to memory-power-, and heat-related parameters. An important task for a compiler in such platforms is to simplify the process of developing applications for limited memory devices and resource-constrained clients. Focusing on array-intensive embedded applications to be executed on single CPU-based architectures, this work explores how loop-based compiler optimizations can be used for increasing memory location reuse. Our goal is to transform a given application in such a way that the resulting code has fewer cases (as compared to the original code), where the lifetimes of array elements overlap. The reduction in lifetimes of array elements can then be exploited by reusing memory locations as much as possible. Our experimental results indicate that the proposed strategy reduces data space requirements of 15 resource constrained applications by more than 40\%, on average. We also demonstrate how this strategy can be combined with data locality (cache behavior)--enhancing techniques so that a compiler can take advantage of both, that is, reduce data memory requirements and improve data locality at the same time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compilers; embedded system; lifetime; Memory; reuse", } @Article{Weng:2009:AMN, author = "Ning Weng and Tilman Wolf", title = "Analytic modeling of network processors for parallel workload mapping", journal = j-TECS, volume = "8", number = "3", pages = "18:1--18:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509290", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network processors are heterogeneous system-on-chip multiprocessors that are optimized to perform packet forwarding and processing tasks at Gigabit data rates. To meet the performance demands of increasing link speeds and complex network applications, network processors are implemented with several dozen embedded processor cores and hardware accelerators that run multiple packet processing applications in parallel. The parallel nature of the processing system makes it increasingly difficult for application developers to understand and manage resources and map processing tasks to the hardware. To address this problem, we present a methodology for profiling and analyzing network processor applications, mapping processing tasks to a generalized network processor architecture, and analytically determining the expected throughput performance. The key novelty of this work is not only the adaptation of application analysis and mapping algorithms to heterogeneous network processors, but also that the entire process can be automated and hidden from the application developer. Starting with the analysis of a uniprocessor implementation of the application, the process yields a mapping of the partitioned application that shows best performance for a given network processor system. The simplicity of the proposed randomized mapping algorithm allows the use of this methodology in network processor runtime systems where dynamic reallocation of tasks is necessary but processing power is limited. We present results that show the effectiveness of the analysis and mapping methodology as well as its application to design space exploration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Application profiling; embedded systems; multiprocessor scheduling; network processors", } @Article{Tseng:2009:FSA, author = "Kuo-Kun Tseng and Yuan-Cheng Lai and Ying-Dar Lin and Tsern-Huei Lee", title = "A fast scalable automaton-matching accelerator for embedded content processors", journal = j-TECS, volume = "8", number = "3", pages = "19:1--19:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509291", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Home and office network gateways often employ a cost-effective embedded network processor to handle their network services. Such network gateways have received strong demand for applications dealing with intrusion detection, keyword blocking, antivirus and antispam. Accordingly, we were motivated to propose an appropriate fast scalable automaton-matching (FSAM) hardware to accelerate the embedded network processors. Although automaton matching algorithms are robust with deterministic matching time, there is still plenty of room for improving their average-case performance. FSAM employs novel prehash and root-index techniques to accelerate the matching for the nonroot states and the root state, respectively, in automation based hardware. The prehash approach uses some hashing functions to pretest the input substring for the nonroot states while the root-index approach handles multiple bytes in one single matching for the root state. Also, FSAM is applied in a prevalent automaton algorithm, Aho--Corasick (AC), which is often used in many content-filtering applications. When implemented in FPGA, FSAM can perform at the rate of 11.1Gbps with the pattern set of 32,634 bytes, demonstrating that our proposed approach can use a small logic circuit to achieve a competitive performance, although a larger memory is used. Furthermore, the amount of patterns in FSAM is not limited by the amount of internal circuits and memories. If the high-speed external memories are employed, FSAM can support up to 21,302 patterns while maintaining similar high performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Aho--Corasick; automaton; Bloom filter; content filtering; String matching", } @Article{Reshadi:2009:HCS, author = "Mehrdad Reshadi and Prabhat Mishra and Nikil Dutt", title = "Hybrid-compiled simulation: an efficient technique for instruction-set architecture simulation", journal = j-TECS, volume = "8", number = "3", pages = "20:1--20:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509292", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Instruction-set simulators are critical tools for the exploration and validation of new processor architectures. Due to the increasing complexity of architectures and time-to-market pressure, performance is the most important feature of an instruction-set simulator. Interpretive simulators are flexible but slow, whereas compiled simulators deliver speed at the cost of flexibility and compilation overhead. This article presents a hybrid instruction-set-compiled simulation (HISCS) technique for generation of fast instruction-set simulators that combines the benefit of both compiled and interpretive simulation. This article makes two important contributions: (i) it improves the interpretive simulation performance by applying compiled simulation at the instruction level using a novel template-customization technique to generate optimized decoded instructions during compile time; and (ii) it reduces the compile-time overhead by combining the benefits of both static and dynamic-compiled simulation. Our experimental results using two contemporary processors (ARM7 and SPARC) demonstrate an order-of-magnitude reduction in compilation time as well as a 70\% performance improvement, on average, over the best-known published result in instruction-set simulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Compiled simulation; instruction set architecture; interpretive simulation; partial evaluation", } @Article{Nguyen:2009:MAE, author = "Nghi Nguyen and Angel Dominguez and Rajeev Barua", title = "Memory allocation for embedded systems with a compile-time-unknown scratch-pad size", journal = j-TECS, volume = "8", number = "3", pages = "21:1--21:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509293", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents the first memory allocation scheme for embedded systems having a scratch-pad memory whose size is unknown at compile time. A scratch-pad memory (SPM) is a fast compiler-managed SRAM that replaces the hardware-managed cache. All existing memory allocation schemes for SPM require the SPM size to be known at compile time. Unfortunately, because of this constraint, the resulting executable is tied to that size of SPM and is not portable to other processor implementations having a different SPM size. Size-portable code is valuable when programs are downloaded during deployment either via a network or portable media. Code downloads are used for fixing bugs or for enhancing functionality. The presence of different SPM sizes in different devices is common because of the evolution in VLSI technology across years. The result is that SPM cannot be used in such situations with downloaded codes.\par To overcome this limitation, our work presents a compiler method whose resulting executable is portable across SPMs of any size. Our technique is to employ a customized installer software, which decides the SPM allocation just before the program's first run, since the SPM size can be discovered at that time. The installer then, based on the decided allocation, modifies the program executable accordingly. The resulting executable places frequently used objects in SPM, considering both code and data for placement. To keep the overhead low, much of the preprocessing for the allocation is done at compile time. Results show that our benchmarks average a 41\% speedup versus an all-DRAM allocation, while the optimal static allocation scheme, which knows the SPM size at compile time and is thus an unachievable upper-bound and is only slightly faster (45\% faster than all-DRAM). Results also show that the overhead from our customized installer averages about 1.5\% in code size, 2\% in runtime, and 3\% in compile time for our benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compiler; data linked list; downloadable codes; embedded loading; embedded systems; Memory allocation; scratch-pad", } @Article{Lysecky:2009:DIM, author = "Roman Lysecky and Frank Vahid", title = "Design and implementation of a {MicroBlaze}-based warp processor", journal = j-TECS, volume = "8", number = "3", pages = "22:1--22:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509294", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "While soft processor cores provided by FPGA vendors offer designers with increased flexibility, such processors typically incur penalties in performance and energy consumption compared to hard processor core alternatives. The recently developed technology of warp processing can help reduce those penalties. Warp processing is the dynamic and transparent transformation of critical software regions from microprocessor execution to much faster circuit execution on an FPGA. In this article, we describe an implementation of a warp processor on a Xilinx Virtex-II Pro and Spartan3 FPGAs incorporating one or more MicroBlaze soft processor cores. We further provide a detailed analysis of the energy overhead of dynamically partitioning an application's kernels to hardware executing within an FPGA. Considering an implementation that periodically partitions the executing application once every minute, a MicroBlaze-based warp processor implemented on a Spartan3 FPGA achieves average speedups of 5.8\times and energy reductions of 49\% compared to the MicroBlaze soft processor core alone --- providing competitive performance and energy consumption compared to existing hard processor cores.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "configurable logic; dynamic optimization; FPGA; hardware/software partitioning; just-in-time (JIT) compilation; soft processor cores; Warp processors", } @Article{Bai:2009:MME, author = "Lan S. Bai and Lei Yang and Robert P. Dick", title = "{MEMMU}: {Memory} expansion for {MMU}-less embedded systems", journal = j-TECS, volume = "8", number = "3", pages = "23:1--23:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509295", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Random access memory (RAM) is tightly constrained in the least expensive, lowest-power embedded systems such as sensor network nodes and portable consumer electronics. The most widely used sensor network nodes have only 4 to 10KB of RAM and do not contain memory management units (MMUs). It is difficult to implement complex applications under such tight memory constraints. Nonetheless, price and power-consumption constraints make it unlikely that increases in RAM in these systems will keep pace with the increasing memory requirements of applications.\par We propose the use of automated compile-time and runtime techniques to increase the amount of usable memory in MMU-less embedded systems. The proposed techniques do not increase hardware cost, and require few or no changes to existing applications. We have developed runtime library routines and compiler transformations to control and optimize the automatic migration of application data between compressed and uncompressed memory regions, as well as a fast compression algorithm well suited to this application. These techniques were experimentally evaluated on Crossbow TelosB sensor network nodes running a number of data-collection and signal-processing applications. Our results indicate that available memory can be increased by up to 50\% with less than 10\% performance degradation for most benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Data compression; embedded system; wireless sensor network", } @Article{Doblander:2009:NSF, author = "Andreas Doblander and Andreas Zoufal and Bernhard Rinner", title = "A novel software framework for embedded multiprocessor smart cameras", journal = j-TECS, volume = "8", number = "3", pages = "24:1--24:??", month = apr, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1509288.1509296", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 16:29:24 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Distributed smart cameras (DSC) are an emerging technology for a broad range of important applications including smart rooms, surveillance, entertainment, tracking, and motion analysis. By having access to many views and through cooperation among the individual cameras, these DSCs have the potential to realize many more complex and challenging applications than single-camera systems.\par This article focuses on the system-level software required for efficient streaming applications on single smart cameras as well as on networks of DSCs. Embedded platforms with limited resources do not provide middleware services well known on general-purpose platforms. Our software framework supports transparent intra- and interprocessor communication while keeping the memory and computation overhead very low. The software framework is based on a publisher--subscriber architecture and provides mechanisms for dynamically loading and unloading software components as well as for graceful degradation in case of software- and hardware-related faults. The software framework has been completely implemented and tested on our embedded smart cameras consisting of an ARM-based network processor and several digital signal processors. Two case studies demonstrate the feasibility of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "distributed embedded systems; fault tolerance; publisher--subscriber; Smart cameras; video surveillance", } @Article{Li:2009:ELC, author = "Zhiyuan Li and Santosh Pande", title = "Editorial: {Languages}, compilers, and tools for embedded systems", journal = j-TECS, volume = "8", number = "4", pages = "25:1--25:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550988", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Murray:2009:CTI, author = "Alastair C. Murray and Richard V. Bennett and Bj{\"o}rn Franke and Nigel Topham", title = "Code transformation and instruction set extension", journal = j-TECS, volume = "8", number = "4", pages = "26:1--26:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550989", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The demand for flexible embedded solutions and short time-to-market has led to the development of extensible processors that allow for customization through user-defined instruction set extensions (ISEs). These are usually identified from plain C sources. In this article, we propose a combined exploration of code transformations and ISE identification. The resulting performance of such a combination has been measured on two benchmark suites. Our results demonstrate that combined code transformations and ISEs can yield average performance improvements of 49\%. This outperforms ISEs when applied in isolation, and in extreme cases yields a speed-up of 2.85.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ASIPs; compilers; Customizable processors; design space exploration; instruction set extension; source-level transformations", } @Article{Hu:2009:CAS, author = "Jie Hu and Feihui Li and Vijay Degalahal and Mahmut Kandemir and N. Vijaykrishnan and Mary J. Irwin", title = "Compiler-assisted soft error detection under performance and energy constraints in embedded systems", journal = j-TECS, volume = "8", number = "4", pages = "27:1--27:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550990", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Soft errors induced by terrestrial radiation are becoming a significant concern in architectures designed in newer technologies. If left undetected, these errors can result in catastrophic consequences or costly maintenance problems in different embedded applications. In this article, we focus on utilizing the compiler's help in duplicating instructions for error detection in VLIW datapaths. The instruction duplication mechanism is further supported by a hardware enhancement for efficient result verification, which avoids the need of additional comparison instructions. In the proposed approach, the compiler determines the instruction schedule by balancing the permissible performance degradation and the energy constraint with the required degree of duplication. Our experimental results show that our algorithms allow the designer to perform trade-off analysis between performance, reliability, and energy consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compilers; Embedded systems; energy consumption; instruction duplication; reliability; soft errors", } @Article{Jafari:2009:EPR, author = "Roozbeh Jafari and Hassan Ghasemzadeh and Foad Dabiri and Ani Nahapetian and Majid Sarrafzadeh", title = "An efficient placement and routing technique for fault-tolerant distributed embedded computing", journal = j-TECS, volume = "8", number = "4", pages = "28:1--28:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents an efficient technique for placement and routing of sensors/actuators and processing units in a grid network. The driver application that we present is a medical jacket, which requires an extremely high level of robustness and fault tolerance. The power consumption of such jacket is another key technological constraint. Our proposed interconnection network is a mesh of wires. A jacket made of fabric and wires would be susceptible to accidental damage via tears. By modeling the tears, we evaluate the probability of having failures on every segment of wires in our mesh interconnection network. Then, we study two problems of placement and routing in the sensor networks such that the fault tolerance is maximized while the power consumption is minimized. We develop efficient integer linear programming (ILP) formulations to address these problems and perform both placement and routing, simultaneously. This ensures that the solution is a lower bound for both problems. We evaluate the effectiveness of our proposed techniques on a variety of benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Distributed embedded system; fault tolerance; placement; routing; sensor networks", } @Article{Lee:2009:CIA, author = "Edward A. Lee and Xiaojun Liu and Stephen Neuendorffer", title = "Classes and inheritance in actor-oriented design", journal = j-TECS, volume = "8", number = "4", pages = "29:1--29:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550992", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Actor-oriented components emphasize concurrency and temporal semantics and are used for modeling and designing embedded software and hardware. Actors interact with one another through ports via a messaging schema that can follow any of several concurrent semantics. Domain-specific actor-oriented languages and frameworks are common (Simulink, LabVIEW, SystemC, etc.). However, they lack many modularity and abstraction mechanisms that programmers have become accustomed to in object-oriented components, such as classes, inheritance, interfaces, and polymorphism, except as inherited from the host language. This article shows a form that such mechanisms can take in actor-oriented components, gives a formal structure, and describes a prototype implementation. The mechanisms support actor-oriented class definitions, subclassing, inheritance, and overriding. The formal structure imposes structural constraints on a model (mainly the ``derivation invariant'') that lead to a policy to govern inheritance. In particular, the structural constraints permit a disciplined form of multiple inheritance with unambiguous inheritance and overriding behavior. The policy is based formally on a generalized ultrametric space with some remarkable properties. In this space, inheritance is favored when actors are ``closer'' (in the generalized ultrametric), and we show that when inheritance can occur from multiple sources, one source is always unambiguously closer than the other.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Actors; components; generalized ultrametric; inheritance; interfaces; overriding; type systems", } @Article{Riccobene:2009:SCB, author = "Elvinia Riccobene and Patrizia Scandurra and Sara Bocchio and Alberto Rosti and Luigi Lavazza and Luigi Mantellini", title = "{SystemC\slash C-based} model-driven design for embedded systems", journal = j-TECS, volume = "8", number = "4", pages = "30:1--30:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550993", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article summarizes our effort, since 2004 up to the present time, for improving the current industrial Systems-on-Chip and Embedded Systems design by joining the capabilities of the unified modeling language (UML) and SystemC/C programming languages to operate at system-level. The proposed approach exploits the OMG model-driven architecture --- a framework for Model-driven Engineering --- capabilities of reducing abstract, coarse-grained and platform-independent system models to fine-grained and platform-specific models. We first defined a design methodology and a development flow for the hardware, based on a SystemC UML profile and encompassing different levels of abstraction. We then included a multithread C UML profile for modelling software applications. Both SystemC/C profiles are consistent sets of modelling constructs designed to lift the programming features (both structural and behavioral) of the two coding languages to the UML modeling level. The new codesign flow is supported by an environment, which allows system modeling at higher abstraction levels (from a functional executable level to a register transfer level) and supports automatic code-generation/back-annotation from/to UML models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "C; ES; MDE; SoC; SystemC; UML", } @Article{Bini:2009:MCE, author = "Enrico Bini and Giorgio Buttazzo and Giuseppe Lipari", title = "Minimizing {CPU} energy in real-time systems with discrete speed management", journal = j-TECS, volume = "8", number = "4", pages = "31:1--31:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550994", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a general framework to analyze and design embedded systems minimizing the energy consumption without violating timing requirements. A set of realistic assumptions is considered in the model in order to apply the results in practical real-time applications. The processor is assumed to have as a set of discrete operating modes, each characterized by speed and power consumption. The energy overhead and the transition delay incurred during mode switches are considered. Task computation times are modeled with a part that scales with the speed and a part having a fixed duration, to take I/O operations into account.\par The proposed method allows to compute the optimal sequence of voltage/speed changes that approximates the minimum continuous speed, which guarantees the feasibility of a given set of real-time tasks, without violating the deadline constraints. The analysis is performed both under fixed and dynamic priority assignments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "CPU energy; Real-time systems", } @Article{Koo:2009:FTG, author = "Heon-Mo Koo and Prabhat Mishra", title = "Functional test generation using design and property decomposition techniques", journal = j-TECS, volume = "8", number = "4", pages = "32:1--32:??", month = jul, year = "2009", CODEN = "????", DOI = "https://doi.org/10.1145/1550987.1550995", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 23 12:32:49 MDT 2009", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Functional verification of microprocessors is one of the most complex and expensive tasks in the current system-on-chip design methodology. Simulation using functional test vectors is the most widely used form of processor validation. A significant bottleneck in the validation of such systems is the lack of automated techniques for directed test generation. While existing model checking--based approaches have proposed several promising ideas for automated test generation, many challenges remain in applying them to industrial microprocessors. The time and resources required for test generation using existing model checking--based techniques can be prohibitively large. This article presents an efficient test generation technique using decompositional model checking. The contribution of the article is the development of both property and design decomposition procedures for efficient test generation of pipelined processors. Our experimental results using a multi-issue MIPS processor and an industrial processor based on Power Architecture\TM{} Technology demonstrate several orders-of-magnitude reduction in validation effort by drastically reducing both test generation time and test program length.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "design decomposition; functional validation; Model checking; pipelined processor; property decomposition; test generation", } @Article{Plaks:2009:GECa, author = "Toomas P. Plaks and Neil Bergmann and Bernard Pottier", title = "Guest editorial {CAPA'08} configurable computing: {Configuring} algorithms, processes, and architecture issue {I}: {Configuring} algorithms and processes", journal = j-TECS, volume = "9", number = "1", pages = "1:1--1:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ferri:2009:RIF, author = "B. H. Ferri and A. A. Ferri", title = "Reconfiguration of {IIR} filters in response to computer resource availability", journal = j-TECS, volume = "9", number = "1", pages = "2:1--2:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2009:TTD, author = "Xiaojun Wang and Miriam Leeser", title = "A truly two-dimensional systolic array {FPGA} implementation of {QR} decomposition", journal = j-TECS, volume = "9", number = "1", pages = "3:1--3:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DoCarmoLucas:2009:ADF, author = "Amilcar {Do Carmo Lucas} and Henning Sahlbach and Sean Whitty and Sven Heithecker and Rolf Ernst", title = "Application development with the {FlexWAFE} real-time stream processing architecture for {FPGAs}", journal = j-TECS, volume = "9", number = "1", pages = "4:1--4:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nahapetian:2009:AAS, author = "Ani Nahapetian and Philip Brisk and Soheil Ghiasi and Majid Sarrafzadeh", title = "An approximation algorithm for scheduling on heterogeneous reconfigurable resources", journal = j-TECS, volume = "9", number = "1", pages = "5:1--5:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Patterson:2009:SMB, author = "C. Patterson and P. Athanas and M. Shelburne and J. Bowen and J. Sur{\'\i}s and T. Dunham and J. Rice", title = "Slotless module-based reconfiguration of embedded {FPGAs}", journal = j-TECS, volume = "9", number = "1", pages = "6:1--6:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lloyd:2009:PSN, author = "Scott Lloyd and Quinn Snell", title = "A packet-switched network architecture for reconfigurable computing", journal = j-TECS, volume = "9", number = "1", pages = "7:1--7:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lubbers:2009:RMP, author = "Enno L{\"u}bbers and Marco Platzner", title = "{ReconOS}: {Multithreaded} programming for reconfigurable computers", journal = j-TECS, volume = "9", number = "1", pages = "8:1--8:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2009:SFB, author = "Jian Huang and Matthew Parris and Jooheung Lee and Ronald F. Demara", title = "Scalable {FPGA}-based architecture for {DCT} computation using dynamic partial reconfiguration", journal = j-TECS, volume = "9", number = "1", pages = "9:1--9:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:40:57 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Plaks:2009:GECb, author = "Toomas P. Plaks and Neil Bergmann and Bernard Pottier", title = "Guest editorial {CAPA'08 Configurable} computing: {Configuring} algorithms, processes, and architecture {Issue II}: {Configuring} hardware architecture", journal = j-TECS, volume = "9", number = "2", pages = "10:1--10:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:00 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alle:2009:RRR, author = "Mythri Alle and Keshavan Varadarajan and Alexander Fell and Ramesh Reddy C. and Nimmy Joseph and Saptarsi Das and Prasenjit Biswas and Jugantor Chetia and Adarsh Rao and S. K. Nandy and Ranjani Narayan", title = "{REDEFINE}: {Runtime} reconfigurable polymorphic {ASIC}", journal = j-TECS, volume = "9", number = "2", pages = "11:1--11:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:00 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banerjee:2009:FPU, author = "Pritha Banerjee and Susmita Sur-Kolay and Arijit Bishnu and Sandip Das and Subhas C. Nandy and Subhasis Bhattacharjee", title = "{FPGA} placement using space-filling curves: {Theory} meets practice", journal = j-TECS, volume = "9", number = "2", pages = "12:1--12:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:00 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Beckett:2009:PSM, author = "Paul Beckett", title = "Power scalability in a mesh-connected reconfigurable architecture", journal = j-TECS, volume = "9", number = "2", pages = "13:1--13:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:00 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2009:STT, author = "Weisheng Zhao and Eric Belhaire and Claude Chappert and Pascale Mazoyer", title = "Spin transfer torque {(STT)-MRAM--based} runtime reconfiguration {FPGA} circuit", journal = j-TECS, volume = "9", number = "2", pages = "14:1--14:??", month = oct, year = "2009", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:00 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2010:CPV, author = "Hyung Sun Lee and Byung Kook Kim", title = "Coscheduling of processor voltage and control task period for energy-efficient control systems", journal = j-TECS, volume = "9", number = "3", pages = "15:1--15:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reddy:2010:CPE, author = "Rakesh Reddy and Peter Petrov", title = "Cache partitioning for energy-efficient and interference-free embedded multitasking", journal = j-TECS, volume = "9", number = "3", pages = "16:1--16:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Geelen:2010:MES, author = "Bert Geelen and Vissarion Ferentinos and Francky Catthoor and Gauthier Lafruit and Diederik Verkest and Rudy Lauwereins and Thanos Stouraitis", title = "Modeling and exploiting spatial locality trade-offs in wavelet-based applications under varying resource requirements", journal = j-TECS, volume = "9", number = "3", pages = "17:1--17:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bueno:2010:ORA, author = "David Bueno and Chris Conger and Alan D. George", title = "Optimizing {rapidIO} architectures for onboard processing", journal = j-TECS, volume = "9", number = "3", pages = "18:1--18:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Inoue:2010:RSC, author = "Hiroaki Inoue and Junji Sakai and Masato Edahiro", title = "A robust seamless communication architecture for next-generation mobile terminals on multi-{CPU} {SoCs}", journal = j-TECS, volume = "9", number = "3", pages = "19:1--19:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Manzanares:2010:CER, author = "Adam Manzanares and Xiaojun Ruan and Shu Yin and Xiao Qin and Adam Roth and Mais Najim", title = "Conserving energy in real-time storage systems with {I/O} burstiness", journal = j-TECS, volume = "9", number = "3", pages = "20:1--20:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Courbot:2010:EBD, author = "Alexandre Courbot and Gilles Grimaud and Jean-Jacques Vandewalle", title = "Efficient off-board deployment and customization of virtual machine-based embedded systems", journal = j-TECS, volume = "9", number = "3", pages = "21:1--21:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xue:2010:IRP, author = "Chun Jason Xue and Jingtong Hu and Zili Shao and Edwin Sha", title = "Iterational retiming with partitioning: {Loop} scheduling with complete memory latency hiding", journal = j-TECS, volume = "9", number = "3", pages = "22:1--22:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cho:2010:LFS, author = "Hyeonjoong Cho and Binoy Ravindran and E. Douglas Jensen", title = "Lock-free synchronization for dynamic embedded real-time systems", journal = j-TECS, volume = "9", number = "3", pages = "23:1--23:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Musoll:2010:CEL, author = "Enric Musoll", title = "A cost-effective load-balancing policy for tile-based, massive multi-core packet processors", journal = j-TECS, volume = "9", number = "3", pages = "24:1--24:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guang:2010:HAM, author = "Liang Guang and Ethiopia Nigussie and Pekka Rantala and Jouni Isoaho and Hannu Tenhunen", title = "Hierarchical agent monitoring design approach towards self-aware parallel systems-on-chip", journal = j-TECS, volume = "9", number = "3", pages = "25:1--25:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{McLoughlin:2010:RTR, author = "Ian Vince McLoughlin and Timo Rolf Bretschneider", title = "Reliability through redundant parallelism for micro-satellite computing", journal = j-TECS, volume = "9", number = "3", pages = "26:1--26:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2010:OMC, author = "Lei Yang and Robert P. Dick and Haris Lekatsas and Srimat Chakradhar", title = "Online memory compression for embedded systems", journal = j-TECS, volume = "9", number = "3", pages = "27:1--27:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cesana:2010:MBM, author = "Ulpian Cesana and Zhen He", title = "Multi-buffer manager: {Energy-efficient} buffer manager for databases on flash memory", journal = j-TECS, volume = "9", number = "3", pages = "28:1--28:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tichy:2010:GAF, author = "Milan Tichy and Jan Schier and David Gregg", title = "{GSFAP} adaptive filtering using log arithmetic for resource-constrained embedded systems", journal = j-TECS, volume = "9", number = "3", pages = "29:1--29:??", month = feb, year = "2010", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 15 18:41:02 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2010:HPO, author = "Lei Yang and Robert P. Dick and Haris Lekatsas and Srimat Chakradhar", title = "High-performance operating system controlled online memory compression", journal = j-TECS, volume = "9", number = "4", pages = "30:1--30:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721696", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Online memory compression is a technology that increases the amount of memory available to applications by dynamically compressing and decompressing their working datasets on demand. It has proven extremely useful in embedded systems with tight physical RAM constraints. The technology can be used to increase functionality, reduce size, and reduce cost, without modifying applications or hardware. This article presents a new software-based online memory compression algorithm for embedded systems. In comparison with the best algorithms used in online memory compression, our new algorithm has a competitive compression ratio but is twice as fast. In addition, we describe several practical problems encountered in developing an online memory compression infrastructure and present solutions. We present a method of adaptively managing the uncompressed and compressed memory regions during application execution. This memory management scheme adapts to the predicted memory requirements of applications. It permits efficient compression for a wide range of applications. We have evaluated our techniques on a portable embedded device and have found that the memory available to applications can be increased by 2.5\times with negligible performance and power consumption penalties, and with no changes to hardware or applications. Our techniques allow existing applications to execute with less physical memory. They also allow applications with larger working datasets to execute on unchanged embedded system hardware, thereby increasing functionality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "compression; Embedded system; memory", } @Article{Wu:2010:SAF, author = "Chin-Hsien Wu", title = "A self-adjusting flash translation layer for resource-limited embedded systems", journal = j-TECS, volume = "9", number = "4", pages = "31:1--31:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721697", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The capacity of flash memory storage systems has been growing at a speed similar to many other storage systems. In order to properly manage the product cost, vendors face serious challenges in resource-limited embedded systems. In this article, a self-adjusting flash translation layer is proposed with low memory requirements. The objective of the design is to provide efficient address mapping and low garbage collection overhead, while controlling main memory usage of the flash translation layer. The capability of the design is evaluated over realistic workloads and benchmarks. System performance is also guaranteed under low memory requirements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "embedded systems; Flash memory; flash translation layer; storage systems", } @Article{Irturk:2010:GAG, author = "Ali Irturk and Bridget Benson and Shahnam Mirzaei and Ryan Kastner", title = "{GUSTO}: an automatic generation and optimization tool for matrix inversion architectures", journal = j-TECS, volume = "9", number = "4", pages = "32:1--32:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721698", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Matrix inversion is a common function found in many algorithms used in wireless communication systems. As FPGAs become an increasingly attractive platform for wireless communication, it is important to understand the trade-offs in designing a matrix inversion core on an FPGA. This article describes a matrix inversion core generator tool, GUSTO, that we developed to ease the design space exploration across different matrix inversion architectures. GUSTO is the first tool of its kind to provide automatic generation of a variety of general-purpose matrix inversion architectures with different parameterization options. GUSTO also provides an optimized application-specific architecture with an average of 59\% area decrease and 3X throughput increase over its general-purpose architecture. The optimized architectures generated by GUSTO provide comparable results to published matrix inversion architecture implementations, but offer the advantage of providing the designer the ability to study the trade-offs between architectures with different design parameters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "design space exploration; Field programmable gate arrays (FPGAs); matrix inversion", } @Article{Yu:2010:FSB, author = "Yue Yu and Shangping Ren and Ophir Frieder", title = "Feasibility of semiring-based timing constraints", journal = j-TECS, volume = "9", number = "4", pages = "33:1--33:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721699", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time and embedded applications often involve different types of timing constraints, such as precedence constraints and real-time constraints. As real-time and embedded applications further advance, new timing constraint types are emerging as well. Recent research on interval-based timing constraints is an example. Thus, it is important to have a uniformed timing constraint representation so that a generalized approach can be developed to analyze the variant constraint types.\par A semiring-based timing constraint model is introduced to generalize the representations of different constraint types. Under this model, we develop an algorithm to check the satisfaction feasibility for a given set of semiring-based timing constraints. This algorithm provides better performance in the average case as compared to applying the Bellman-Ford algorithm directly on the constraint set.\par In addition, for a set of feasible semiring-based timing constraints, event occurrence points that satisfy the constraint set form a (hyperdimension) feasible region. For the given two sets of timing constraints, we develop a necessary and sufficient condition to testify whether the two constraint sets' feasible regions have an inclusion relation. If one feasible region is included in the other, we know that the real-time event occurrences that satisfy the included constraint set will necessarily satisfy the including set.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "timing constraint feasibility analysis; Timing constraints", } @Article{Tahaee:2010:PAP, author = "Seyed-Abdoreza Tahaee and Amir Hossein Jahangir", title = "A polynomial algorithm for partitioning problems", journal = j-TECS, volume = "9", number = "4", pages = "34:1--34:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721700", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article takes a theoretical approach to focus on the algorithmic properties of hardware/software partitioning. It proposes a method with polynomial complexity to find the global optimum of an NP-hard model partitioning problem for 75\% of occurrences under some practical conditions. The global optimum is approached with a lower bound distance for the remaining 25\%. Furthermore, this approach ensures finding the 2-approximate of the global optimum partition in 97\% of instances where technical assumptions exist. The strategy is based on intelligently changing the parameters of the polynomial model of the partitioning problem to force it to produce (or approach) the exact solution to the NP-hard model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "hardware/software codesign; maximum flow minimum cut problem; NP-hard problems; Partitioning problem", } @Article{Peng:2010:OWZ, author = "Huan-Kai Peng and Youn-Long Lin", title = "An optimal warning-zone-length assignment algorithm for real-time and multiple-{QoS} on-chip bus arbitration", journal = j-TECS, volume = "9", number = "4", pages = "35:1--35:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721701", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In an advanced System-on-Chip (SoC) for real-time applications, the arbiter of its on-chip communication subsystem needs to support multiple QoS criteria while providing a hard real-time guarantee. To fulfill both objectives, the arbitration algorithm must dynamically switch between NonReal-Time (NRT) and Real-Time (RT) modes such that use of the RT mode is minimized to best accommodate the overall QoS criteria. In this article, we define a model for this problem, and propose optimal solutions to its associated problems with static and dynamic warning-zone-length assignment. Compared with previous works, the proposed approach enables a bus arbiter to use much less RT mode in providing a Real-Time (RT) guarantee and, therefore, gives the arbiter more opportunity to employ non-RT modes to achieve better overall QoS. Experimental results show that the proposed approach reduces RT mode usage by as much as 37.1\%. Moreover, that reduction in RT mode usage helps cut the execution time by 27.0\% when applying our approach to an industrial DRAM controller. Another case study on an AMBA-compliant ultra-high-resolution H.264 decoder IP shows that the proposed approach reduces RT mode usage by 26.4\%, which leads to an average reduction of 10.4\% in decoding time. Finally, when implementing a 16 master arbiter, it costs only 6.9K and 9.5K gates of overhead using the proposed static and dynamic approach, respectively. Therefore, the proposed approach is suitable for real-time SoC applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "on-chip communication; QoS; real-time scheduling; System-on-Chip", } @Article{Schlich:2010:MCS, author = "Bastian Schlich", title = "Model checking of software for microcontrollers", journal = j-TECS, volume = "9", number = "4", pages = "36:1--36:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721702", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The interest of industries in model checking software for microcontrollers is increasing. However, there are currently no appropriate tools that can be applied by embedded systems developers for the direct verification of software for microcontrollers without the need for manual modeling. This article describes a new approach to model checking software for microcontrollers, which verifies the assembly code of the software. The state space is built using a tailored simulator, which abstracts from time, handles nondeterminism, and creates an overapproximation of the behavior shown by the real microcontroller. Within this simulator, we apply abstraction techniques to tackle the state-explosion problem. In our approach, we combine different formal methods, namely, model checking, static analysis, and abstract interpretation. We also combine explicit and symbolic model checking techniques. This article presents a case study using several programs to demonstrate the efficiency of the applied abstraction techniques and to show the applicability of this approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Assembly code; formal verification; microcontroller; model checking; static analysis", } @Article{Bombieri:2010:SND, author = "Nicola Bombieri and Franco Fummi and Davide Quaglia", title = "System\slash network design-space exploration based on {TLM} for networked embedded systems", journal = j-TECS, volume = "9", number = "4", pages = "37:1--37:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721703", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a methodology for the design of Networked Embedded Systems (NESs), which extends Transaction Level Modeling (TLM) to perform system/network design-space exploration. As a result, a new design dimension is added to the traditional TLM refinement process to represent network configuration alternatives. Each network configuration can be used to drive both architecture exploration and system validation after each refinement step. A system/network simulation taxonomy is investigated aiming at precisely identifying the role of cosimulation in system/network design-space exploration. Furthermore, a general criterion to map functionalities to system and network models is presented. As a case study, the proposed methodology is applied to the design of a Voice-over-IP client.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "networked embedded systems; Transaction level modeling", } @Article{Lin:2010:SSA, author = "Chang Hong Lin and Marilyn Wolf and Xenefon Koutsoukos and Sandeep Neema and Janos Sztipanovits", title = "System and software architectures of distributed smart cameras", journal = j-TECS, volume = "9", number = "4", pages = "38:1--38:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721704", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we describe a distributed, peer-to-peer gesture recognition system along with a software architecture modeling technique and authority control protocol for ubiquitous cameras. This system performs gesture recognition in real time by combining imagery from multiple cameras without using a central server. We propose a system architecture that uses a network of inexpensive cameras to perform in-network video processing. A methodology for transforming well-designed single-node algorithm to distributed system is also proposed. Applications for ubiquitous cameras can be modeled as the composition of a finite-state machine of the system, functional services, and middleware. A service-oriented software architecture is proposed to dynamically reconfigure services when system state changes. By exchanging data and control messages between neighboring sensors, each node can maintain broader view of the environment with integrated video-processing results. Our prototype system is built on Windows machines, and uses standard video cameras as sensors and local network as a communication channel.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Distributed cameras; smart camera; software architecture", } @Article{Zhou:2010:MMS, author = "Gang Zhou and Yafeng Wu and Ting Yan and Tian He and Chengdu Huang and John A. Stankovic and Tarek F. Abdelzaher", title = "A multifrequency {MAC} specially designed for wireless sensor network applications", journal = j-TECS, volume = "9", number = "4", pages = "39:1--39:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721705", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multifrequency media access control has been well understood in general wireless ad hoc networks, while in wireless sensor networks, researchers still focus on single frequency solutions. In wireless sensor networks, each device is typically equipped with a single radio transceiver and applications adopt much smaller packet sizes compared to those in general wireless ad hoc networks. Hence, the multifrequency MAC protocols proposed for general wireless ad hoc networks are not suitable for wireless sensor network applications, which we further demonstrate through our simulation experiments. In this article, we propose MMSN, which takes advantage of multifrequency availability while, at the same time, takes into consideration the restrictions of wireless sensor networks. In MMSN, four frequency assignment options are provided to meet different application requirements. A scalable media access is designed with efficient broadcast support. Also, an optimal nonuniform back-off algorithm is derived and its lightweight approximation is implemented in MMSN, which significantly reduces congestion in the time synchronized media access design. Through extensive experiments, MMSN exhibits the prominent ability to utilize parallel transmissions among neighboring nodes. When multiple physical frequencies are available, it also achieves increased energy efficiency, demonstrating the ability to work against radio interference and the tolerance to a wide range of measured time synchronization errors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "media access control; multi-channel; radio interference; time synchronization; Wireless sensor networks", } @Article{Jung:2010:SFS, author = "Dawoon Jung and Jeong-Uk Kang and Heeseung Jo and Jin-Soo Kim and Joonwon Lee", title = "Superblock {FTL}: a superblock-based {Flash Translation Layer} with a hybrid address translation scheme", journal = j-TECS, volume = "9", number = "4", pages = "40:1--40:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721706", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In NAND flash-based storage systems, an intermediate software layer called a Flash Translation Layer (FTL) is usually employed to hide the erase-before-write characteristics of NAND flash memory. We propose a novel superblock-based FTL scheme, which combines a set of adjacent logical blocks into a superblock. In the proposed Superblock FTL, superblocks are mapped at coarse granularity, while pages inside the superblock are mapped freely at fine granularity to any location in several physical blocks. To reduce extra storage and flash memory operations, the fine-grain mapping information is stored in the spare area of NAND flash memory. This hybrid address translation scheme has the flexibility provided by fine-grain address translation, while reducing the memory overhead to the level of coarse-grain address translation. Our experimental results show that the proposed FTL scheme significantly outperforms previous block-mapped FTL schemes with roughly the same memory overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "FTL; hybrid address translation; NAND flash memory; storage system", } @Article{Klues:2010:LLD, author = "Kevin Klues and Guoliang Xing and Chenyang Lu", title = "Link layer driver architecture for unified radio power management in wireless sensor networks", journal = j-TECS, volume = "9", number = "4", pages = "41:1--41:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721707", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless Sensor Networks (WSNs) represent a new generation of networked embedded systems that must achieve long lifetimes on scarce amounts of energy. Since radio communication accounts for the primary source of power drain in these networks, a large number of different radio power management protocols have been proposed. However, the lack of operating system support for flexibly integrating them with a diverse set of applications and network platforms has made them difficult to use. This article focuses on providing link layer support toward realizing a unified power management architecture (UPMA) for WSNs. In contrast to existing monolithic approaches, we provide (i) a set of standard interfaces that separate link layer power management protocols from common MAC level functionality, (ii) an architectural framework that allows applications to easily swap out different power-management protocols depending on its needs, and (iii) a mechanism for coordinating multiple applications with different power management requirements. We have implemented our approach on both the Mica2 and Telosb radio drivers in TinyOS-2.0, the second generation of the de facto standard operating system for WSNs. Microbenchmark results show that our approach can coordinate the power-management requirements of multiple applications in a platform independent fashion while incurring negligible overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "architecture; framework; radio power management; Wireless sensor networks", } @Article{Lee:2010:IHM, author = "Jupyung Lee and Kyu Ho Park", title = "Interrupt handler migration and direct interrupt scheduling for rapid scheduling of interrupt-driven tasks", journal = j-TECS, volume = "9", number = "4", pages = "42:1--42:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721708", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose two techniques that aim to minimize the scheduling latency of high-priority interrupt-driven tasks, named the Interrupt Handler Migration (IHM) and Direct Interrupt Scheduling (DIS). The IHM allows the interrupt handler to be migrated from the interrupt handler thread to the corresponding target process so that additional context switch can be avoided and the cache hit ratio with respect to the data generated by the interrupt handler can be improved. In addition, the DIS allows the shortest path reserved for urgent interrupt-process pairs to be laid between the interrupt arrival and target process by dividing a series of interrupt-driven operations into nondeferrable and deferrable operations. Both the IHM and DIS can be combined in a natural way and can operate concurrently. These techniques can be applied to all kinds of interrupt handlers with no modification to them. The proposed techniques not only reduce the scheduling latency, but also resolve the interrupt-driven priority inversion problem.\par We implemented a prototype in the Linux 2.6.19 kernel after adding real-time patches. Experimental results show that the scheduling latency is significantly reduced by up to 84.2\% when both techniques are applied together. When the Linux OS runs on an ARM-based embedded CPU running at 200MHz, the scheduling latency can become as low as 30$ \mu $ s, which is much closer to the hardware-specific limitations. By lowering the scheduling latency, the limited CPU cycles can be consumed more for user-level processes and less for system-level tasks, such as interrupt handling and scheduling.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "interrupt handling; latency; Linux; Real-time operating system; responsiveness; scheduling", } @Article{Tan:2010:MSE, author = "Chiu C. Tan and Bo Sheng and Haodong Wang and Qun Li", title = "{Microsearch}: a search engine for embedded devices used in pervasive computing", journal = j-TECS, volume = "9", number = "4", pages = "43:1--43:??", month = mar, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1721695.1721709", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 2 17:12:34 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present Microsearch, a search system suitable for embedded devices used in ubiquitous computing environments. Akin to a desktop search engine, Microsearch indexes the information inside a small device, and accurately resolves a user's queries. Given the limited hardware, conventional search engine design and algorithms cannot be used. We adopt Information Retrieval (IR) techniques for query resolution, and proposed a new space-efficient top-$k$ query resolution algorithm. A theoretical model of Microsearch is given to better understand the trade-offs in design parameters. Evaluation is done via actual implementation on off-the-shelf hardware.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Embedded search engine; information retrieval; pervasive computing", } @Article{Higuera-Toledano:2010:ISI, author = "M. Teresa Higuera-Toledano and Doug Locke and Angelo Corsaro", title = "Introduction to special issue on {Java} technologies for real-time and embedded systems", journal = j-TECS, volume = "10", number = "1", pages = "1:1--1:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814540", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DosSantos:2010:MPB, author = "Osmar Marchi {Dos Santos} and Andy Wellings", title = "Measuring and policing blocking times in real-time systems", journal = j-TECS, volume = "10", number = "1", pages = "2:1--2:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814541", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In real-time systems, the execution-time overrun of a thread may lead to a deadline being missed by the thread or even others threads in the system. From a fault tolerance perspective, both execution time overruns and deadline misses can be considered timing errors that could potentially cause a failure in the system's ability to deliver its services in a timely manner. In this context, the ideal is to detect the error in the system as soon as possible, so that the propagation of the error can be limited and error recovery strategies can take place with more accurate information. The run-time support mechanism usually deployed for monitoring the timing requirements of real-time systems is based on deadline monitoring, that is, the system calls specific application code whenever a deadline is violated. Recognizing that deadline monitoring may not be enough for providing an adequate level of fault tolerance for timing errors, major real-time programming standards, like Ada, POSIX and the Real-Time Specification for Java (RTSJ), have proposed different mechanisms for monitoring the execution time of threads. Nevertheless, in order to provide a complete fault tolerance approach for timing errors, the potential blocking time of threads also has to be monitored. In this article, we propose mechanisms for measuring and policing the blocking time of threads in the context of both {\em basic priority inheritance\/} and {\em priority ceiling protocols}. The notion of {\em blocking-time clocks and timers\/} for the POSIX standard is proposed, implemented and evaluated in the open-source real-time operating system MaRTE OS. Also, a {\em blocking time monitoring model\/} for measuring and policing blocking times in the RTSJ framework is specified. This model is implemented and evaluated in the (RTSJ-compliant) open-source middleware jRate, running on top of MaRTE OS.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "error detection; POSIX standard; Real-time specification for Java; timing errors", } @Article{Zerzelidis:2010:FFS, author = "Alexandros Zerzelidis and Andy Wellings", title = "A framework for flexible scheduling in the {RTSJ}", journal = j-TECS, volume = "10", number = "1", pages = "3:1--3:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814542", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a viable solution to introducing flexible scheduling in the Real-Time specification for Java (RTSJ), in the form of a flexible scheduling framework. The framework allows the concurrent use of multiple application-defined scheduling policies, each scheduling a subset of the total set of threads. Moreover, all threads, regardless of the policy under which they are scheduled, are permitted to share common resources. Thus, the framework can accommodate a variety of interworking applications (soft, firm, and hard) running under the RTSJ. The proposed approach is a two-level scheduling framework, where the first level is the RTSJ priority scheduler and the second level is under application control. This article describes the framework's protocol, examines the different types of scheduling policies that can be supported, and evaluates the proposed framework by measuring its execution cost. A description of an application-defined Earliest-Deadline-First (EDF) scheduler illustrates how the interface can be used. Minimum backward-compatible changes to the RTSJ specification are discussed to motivate the required interface. The only assumptions made about the underlying real-time operating system is that it supports preemptive priority-based dispatching of threads and that changes to priorities have immediate effect.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "flexible scheduling; RTSJ; Scheduling framework", } @Article{Spring:2010:RAI, author = "Jesper Honig Spring and Filip Pizlo and Jean Privat and Rachid Guerraoui and Jan Vitek", title = "{Reflexes}: {Abstractions} for integrating highly responsive tasks into {Java} applications", journal = j-TECS, volume = "10", number = "1", pages = "4:1--4:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814543", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/csharp.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Achieving submillisecond response times in a managed language environment such as Java or C\# requires overcoming significant challenges. In this article, we propose Reflexes, a programming model and runtime system infrastructure that lets developers seamlessly mix highly responsive tasks and timing-oblivious Java applications. Thus enabling gradual addition of real-time features, to a non-real-time application without having to resort to recoding the real-time parts in a different language such as C or Ada. Experiments with the Reflex prototype implementation show that it is possible to run a real-time task with a period of 45$ \mu $s with an accuracy of 99.996\% (only 0.001\% worse than the corresponding C implementation) in the presence of garbage collection and heavy load ordinary Java threads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Java virtual machine; memory management; Real-time systems", } @Article{Kim:2010:EAE, author = "Minseong Kim and Andy Wellings", title = "Efficient asynchronous event handling in the real-time specification for {Java}", journal = j-TECS, volume = "10", number = "1", pages = "5:1--5:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814544", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Real-Time Specification for Java (RTSJ) is becoming mature. It has been implemented, formed the basis for research and used in serious applications. Some strengths and weaknesses are emerging. One of the areas that requires further elaboration is asynchronous event handling (AEH). The primary goal for handlers in the RTSJ is to have a lightweight concurrency mechanism. Some implementation will, however, simply map a handler to a real-time thread and this results in undermining the original motivations and introduces performance penalties. However it is generally unclear how to map handlers to real-time threads effectively. Also the support for nonblocking handlers in the RTSJ is criticized as lacking in configurability as implementations are unable to take advantage of them. This article, therefore, examines the AEH techniques used in some popular RTSJ implementations and proposes two efficient AEH models for the RTSJ. We then define formal models of the RTSJ AEH implementations using the automata formalism provided by the UPPAAL model checking tool. Using the automata models, their properties are explored and verified. In the proposed models, blocking and nonblocking handlers are serviced by different algorithms. In this way, it is possible to assign a real-time thread to a handler at the right time in the right place while maintaining the fewest possible threads overall and to give a certain level of configurability to AEH. We also have implemented the proposed models on an existing RTSJ implementation, jRate and executed a set of performance tests that measure their respective dispatch and multiple-handler completion latencies. The results from the tests and the verifications indicate that the proposed models require fewer threads on average with better performance than other approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "asynchronous event handling; blocking handler; multiple-server switching phenomenon; nonblocking handler; RTSJ", } @Article{Schoeberl:2010:NRT, author = "Martin Schoeberl and Wolfgang Puffitsch", title = "Nonblocking real-time garbage collection", journal = j-TECS, volume = "10", number = "1", pages = "6:1--6:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814545", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A real-time garbage collector has to fulfill two basic properties: ensure that programs with bounded allocation rates do not run out of memory and provide short blocking times. Even for incremental garbage collectors, two major sources of blocking exist, namely, root scanning and heap compaction. Finding root nodes of an object graph is an integral part of tracing garbage collectors and cannot be circumvented. Heap compaction is necessary to avoid probably unbounded heap fragmentation, which in turn would lead to unacceptably high memory consumption. In this article, we propose solutions to both issues.\par Thread stacks are local to a thread, and root scanning, therefore, only needs to be atomic with respect to the thread whose stack is scanned. This fact can be utilized by either blocking only the thread whose stack is scanned, or by delegating the responsibility for root scanning to the application threads. The latter solution eliminates blocking due to root scanning completely. The impact of this solution on the execution time of a garbage collector is shown for two different variants of such a root scanning algorithm.\par During heap compaction, objects are copied. Copying is usually performed atomically to avoid interference with application threads, which could render the state of an object inconsistent. Copying of large objects and especially large arrays introduces long blocking times that are unacceptable for real-time systems. In this article, an interruptible copy unit is presented that implements nonblocking object copy. The unit can be interrupted after a single word move.\par We evaluate a real-time garbage collector that uses the proposed techniques on a Java processor. With this garbage collector, it is possible to run high-priority hard real-time tasks at 10 kHz parallel to the garbage collection task on a 100 MHz system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Garbage collection; nonblocking copying; real-time; root scanning", } @Article{Basanta-Val:2010:NHR, author = "Pablo Basanta-Val and Marisol Garc{\'\i}a-Valls and Iria Est{\'e}vez-Ayres", title = "{No-Heap Remote Objects} for distributed real-time {Java}", journal = j-TECS, volume = "10", number = "1", pages = "7:1--7:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814546", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents an approach to providing real-time support for Java's Remote Method Invocation (RMI) and its integration with the RTSJ memory model in order to leave out garbage collection. A new construct for remote objects, called {\em No-heap Remote object\/} ({\em NhRo\/}), is introduced. The use of a NhRo guarantees that memory required to perform a remote invocation (at the server side) does not use heap memory. Thus, the aim is to avoid garbage collection in the remote invocation process, improving predictability and memory isolation of distributed Java-based real-time applications. The article presents the bare model and the main programming patterns that are associated with the NhRo model. Sun RMI implementation has been modified to integrate the NhRo model in both static and dynamic environments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "distributed real-time Java; DRTSJ; Real-time Java; real-time remote objects; region-based memory management; RTSJ", } @Article{Curley:2010:RDT, author = "Edward Curley and Binoy Ravindran and Jonathan Anderson and E. Douglas Jensen", title = "Recovering from distributable thread failures in distributed real-time {Java}", journal = j-TECS, volume = "10", number = "1", pages = "8:1--8:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814547", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider the problem of recovering from the failures of distributable threads (``threads'') in distributed real-time systems that operate under runtime uncertainties including those on thread execution times, thread arrivals, and node failure occurrences. When a thread experiences a node failure, the result is a broken thread having an orphan. Under a termination model, the orphans must be detected and aborted, and exceptions must be delivered to the farthest, contiguous surviving thread segment for resuming thread execution. Our application/scheduling model includes the proposed distributable thread programming model for the emerging Distributed Real-Time Specification for Java (DRTSJ), together with an exception-handler model. Threads are subject to time/utility function (TUF) time constraints and an utility accrual (UA) optimality criterion. A key underpinning of the TUF/UA scheduling paradigm is the notion of ``best-effort'' where higher importance threads are always favored over lower importance ones, irrespective of thread urgency as specified by their time constraints. We present a thread scheduling algorithm called HUA and a thread integrity protocol called TPR. We show that HUA and TPR bound the orphan cleanup and recovery time with bounded loss of the best-effort property. Our implementation experience for HUA/TPR in the Reference Implementation of the proposed programming model for the DRTSJ demonstrates the algorithm/protocol's effectiveness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "distributable thread; Distributed; distributed scheduling; Java; real-time; thread integrity", } @Article{Pitter:2010:RTJ, author = "Christof Pitter and Martin Schoeberl", title = "A real-time {Java} chip-multiprocessor", journal = j-TECS, volume = "10", number = "1", pages = "9:1--9:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814548", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Chip-multiprocessors are an emerging trend for embedded systems. In this article, we introduce a real-time Java multiprocessor called JopCMP. It is a symmetric shared-memory multiprocessor, and consists of up to eight Java Optimized Processor (JOP) cores, an arbitration control device, and a shared memory. All components are interconnected via a system on chip bus. The arbiter synchronizes the access of multiple CPUs to the shared main memory. In this article, three different arbitration policies are presented, evaluated, and compared with respect to their real-time and average-case performance: a fixed priority, a fair-based, and a time-sliced arbiter.\par Tasks running on different CPUs of a chip-multiprocessor (CMP) influence each others' execution times when accessing a shared memory. Therefore, the system needs an arbiter that is able to limit the worst-case execution time of a task running on a CPU, even though tasks executing simultaneously on other CPUs access the main memory. Our research shows that timing analysis is in fact possible for homogeneous multiprocessor systems with a shared memory. The timing analysis of tasks, executing on the CMP using time-sliced memory arbitration, leads to viable worst-case execution time bounds.\par The time-sliced arbiter divides the memory access time into equal time slots, one time slot for each CPU. This memory arbitration scheme allows for a calculation of upper bounds of Java application worst-case execution times, depending on the number of CPUs, the time slot size, and the memory access time. Examples of worst-case execution time calculation are presented, and the analyzed results of a real-world application task are compared to measured execution time results. Finally, we evaluate the tradeoffs when using a time-predictable solution compared to using average-case optimized chip-multiprocessors, applying three different benchmarks. These experiments are carried out by executing the programs on the CMP prototype.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Java processor; multiprocessor; Real-time system; shared memory; worst-case execution time", } @Article{Kaiser:2010:ISI, author = "William Kaiser and Majid Sarrafzadeh", title = "Introduction to special issue on wireless health", journal = j-TECS, volume = "10", number = "1", pages = "10:1--10:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814549", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ko:2010:MME, author = "Jeonggil Ko and Jong Hyun Lim and Yin Chen and Rv{\~a}zvan Musvaloiu-E and Andreas Terzis and Gerald M. Masson and Tia Gao and Walt Destler and Leo Selavo and Richard P. Dutton", title = "{MEDiSN}: {Medical} emergency detection in sensor networks", journal = j-TECS, volume = "10", number = "1", pages = "11:1--11:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814550", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Staff shortages and an increasingly aging population are straining the ability of emergency departments to provide high quality care. At the same time, there is a growing concern about hospitals' ability to provide effective care during disaster events. For these reasons, tools that automate patient monitoring have the potential to greatly improve efficiency and quality of health care. Towards this goal, we have developed {\em MEDiSN}, a wireless sensor network for monitoring patients' physiological data in hospitals and during disaster events. MEDiSN comprises {\em Physiological Monitors\/} (PMs), which are custom-built, patient-worn motes that sample, encrypt, and sign physiological data and {\em Relay Points\/} (RPs) that self-organize into a multi-hop wireless backbone for carrying physiological data. Moreover, MEDiSN includes a back-end server that persistently stores medical data and presents them to authenticated GUI clients. The combination of MEDiSN's two-tier architecture and optimized rate control protocols allows it to address the compound challenge of reliably delivering large volumes of data while meeting the application's QoS requirements. Results from extensive simulations, testbed experiments, and multiple pilot hospital deployments show that MEDiSN can scale from tens to at least five hundred PMs, effectively protect application packets from congestive and corruptive losses, and deliver medically actionable data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Medical sensor networks; wireless physiological monitoring", } @Article{Coronato:2010:FSW, author = "Antonio Coronato and Giuseppe {De Pietro}", title = "Formal specification of wireless and pervasive healthcare applications", journal = j-TECS, volume = "10", number = "1", pages = "12:1--12:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814551", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless and pervasive healthcare applications typically present critical requirements from the point of view of functional correctness, reliability, availability, security, and safety. In contrast to the case of classic safety critical applications, the behavior of wireless and pervasive applications is affected by the movements and location of users and resources.\par This article presents a methodology to formally express requirements in safety critical wireless and pervasive healthcare applications in order to achieve a higher degree of dependability. In particular, it will be shown how it is possible to formalize and constrict mobility characteristics by combining, and in some cases extending, several formal methods. The article also describes a rigorous specification process. Finally, it concludes with a case study of a real safety critical pervasive healthcare application that is going to be deployed in a city hospital.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "Formal specification; methodologies and tools; wireless and pervasive healthcare applications", } @Article{Waluyo:2010:MMB, author = "Agustinus Borgy Waluyo and Wee-Soon Yeoh and Isaac Pek and Yihan Yong and Xiang Chen", title = "{MobiSense}: {Mobile} body sensor network for ambulatory monitoring", journal = j-TECS, volume = "10", number = "1", pages = "13:1--13:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814552", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces MobiSense, a novel mobile health monitoring system for ambulatory patients. MobiSense resides in a mobile device, communicates with a set of body sensor devices attached to the wearer, and processes data from these sensors. MobiSense is able to detect body postures such as lying, sitting, and standing, and walking speed, by utilizing our rule-based heuristic activity classification scheme based on the extended Kalman (EK) Filtering algorithm. Furthermore, the proposed system is capable of controlling each of the sensor devices, and performing resource reconfiguration and management schemes (sensor sleep/wake-up mode). The architecture of MobiSense is highlighted and discussed in depth. The system has been implemented, and its prototype is showcased. We have also carried out rigorous performance measurements of the system including real-time and query latency as well as the power consumption of the sensor nodes. The accuracy of our activity classifier scheme has been evaluated by involving several human subjects, and we found promising results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "ambulatory patient monitoring; pervasive healthcare; wireless body sensor network; Wireless health system", } @Article{Quwaider:2010:TPA, author = "Muhannad Quwaider and Jayanthi Rao and Subir Biswas", title = "Transmission power assignment with postural position inference for on-body wireless communication links", journal = j-TECS, volume = "10", number = "1", pages = "14:1--14:??", month = aug, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1814539.1814553", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 30 15:29:45 MDT 2010", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a novel transmission power assignment mechanism for on-body wireless links formed between severely energy-constrained wearable and implanted sensors. The key idea is to develop a measurement-based framework in which the postural position as it pertains to a given wireless link is first inferred based on the measured RF signal strength and packet drops. Then optimal power assignment is done by fitting those measurement results into a model describing the relationship between the assigned power and the resulting signal strength. A closed loop power control mechanism is then added for iterative convergence to the optimal power level as a response to both intra-and-inter posture body movements. This provides a practical paradigm for on-body power assignment, which cannot leverage the existing mechanisms in the literature that rely on localization, which is not realistic for on-body sensors. Extensive experimental results are provided to demonstrate the model building and algorithm performance on a prototype body area network. The proposed mechanism has also been compared with a number of other closed loop mechanisms and an experimental benchmark.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "adaptive power control; Body area network; link quality measurement; radio link quality", } @Article{Basten:2010:EMD, author = "Twan Basten and Rolf Ernst", title = "Editorial: {Model-driven} embedded-system design", journal = j-TECS, volume = "10", number = "2", pages = "15:1--15:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880051", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Geilen:2010:SDS, author = "Marc Geilen", title = "Synchronous dataflow scenarios", journal = j-TECS, volume = "10", number = "2", pages = "16:1--16:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880052", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Synchronous Dataflow (SDF) model of computation by Lee and Messerschmitt has become popular for modeling concurrent applications on a multiprocessor platform. It is used to obtain a guaranteed, predictable performance. The model, on the other hand, is quite restrictive in its expressivity, making it less applicable to many modern, more dynamic applications. A common technique to deal with dynamic behavior is to consider different scenarios in separation. This analysis is, however, currently limited mainly to sequential applications. In this article, we present a new analysis approach that allows analysis of synchronous dataflow models across different scenarios of operation. The dataflow graphs corresponding to the different scenarios can be completely different.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wiggers:2010:BCC, author = "Maarten H. Wiggers and Marco J. G. Bekooij and Gerard J. M. Smit", title = "Buffer capacity computation for throughput-constrained modal task graphs", journal = j-TECS, volume = "10", number = "2", pages = "17:1--17:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880053", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Increasingly, stream-processing applications include complex control structures to better adapt to changing conditions in their environment. This adaptivity often results in task execution rates that are dependent on the processed stream. Current approaches to compute buffer capacities that are sufficient to satisfy a throughput constraint have limited applicability in case of data-dependent task execution rates. In this article, we present a dataflow model that allows tasks to have loops with an unbounded number of iterations. For instances of this dataflow model, we present efficient checks on their validity. Furthermore, we present an efficient algorithm to compute buffer capacities that are sufficient to satisfy a throughput constraint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Falk:2010:ASA, author = "Joachim Falk and Christian Zebelein and Joachim Keinert and Christian Haubelt and Juergen Teich and Shuvra S. Bhattacharyya", title = "Analysis of {SystemC} actor networks for efficient synthesis", journal = j-TECS, volume = "10", number = "2", pages = "18:1--18:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880054", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Applications in the signal processing domain are often modeled by dataflow graphs. Due to heterogeneous complexity requirements, these graphs contain both dynamic and static dataflow actors. In previous work, we presented a generalized clustering approach for these heterogeneous dataflow graphs in the presence of unbounded buffers. This clustering approach allows the application of static scheduling methodologies for static parts of an application during embedded software generation for multiprocessor systems. It systematically exploits the predictability and efficiency of the static dataflow model to obtain latency and throughput improvements. In this article, we present a generalization of this clustering technique to dataflow graphs with bounded buffers, therefore enabling synthesis for embedded systems without dynamic memory allocation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Razavi:2010:SAB, author = "Niloofar Razavi and Razieh Behjati and Hamideh Sabouri and Ehsan Khamespanah and Amin Shali and Marjan Sirjani", title = "{Sysfier}: {Actor-based} formal verification of {SystemC}", journal = j-TECS, volume = "10", number = "2", pages = "19:1--19:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880055", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "SystemC is a system-level modeling language that can be used effectively for hardware/software co-design. Since a major goal of SystemC is to enable verification at higher levels of abstraction, the tendency is now directing to introducing formal verification approaches for SystemC. In this article, we propose an approach for formal verification of SystemC designs, and provide the semantics of SystemC using Labeled Transition Systems (LTS) for this purpose. An actor-based language, Rebeca, is used as an intermediate language. SystemC designs are mapped to Rebeca models and then Rebeca verification toolset is used to verify LTL and CTL properties. To tackle the state-space explosion, Rebeca model checkers offer some reduction policies that make them appropriate for SystemC verification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Adler:2010:CBM, author = "Rasmus Adler and Ina Schaefer and Mario Trapp and Arnd Poetzsch-Heffter", title = "Component-based modeling and verification of dynamic adaptation in safety-critical embedded systems", journal = j-TECS, volume = "10", number = "2", pages = "20:1--20:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880056", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Adaptation is increasingly used in the development of safety-critical embedded systems, in particular to reduce hardware needs and to increase availability. However, composing a system from many reconfigurable components can lead to a huge number of possible system configurations, inducing a complexity that cannot be handled during system design. To overcome this problem, we propose a new component-based modeling and verification method for adaptive embedded systems. The component-based modeling approach facilitates abstracting a composition of components to a hierarchical component. In the hierarchical component, the number of possible configurations of the composition is reduced to a small number of hierarchical configurations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Driver:2010:MES, author = "Cormac Driver and Sean Reilly and {\'E}amonn Linehan and Vinny Cahill and Siobh{\'a}n Clarke", title = "Managing embedded systems complexity with aspect-oriented model-driven engineering", journal = j-TECS, volume = "10", number = "2", pages = "21:1--21:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880057", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Model-driven engineering addresses issues of platform heterogeneity and code quality through the use of high-level system models and subsequent automatic transformations. Adoption of the model-driven software engineering paradigm for embedded systems necessitates specification of appropriate models of often complex systems. Modern embedded systems are typically composed of multiple functional and nonfunctional concerns, with the nonfunctional concerns (e.g., timing and performance) typically affecting the design and implementation of the functional concerns. The presence of crosscutting concerns makes specification of adequate platform-independent models a significant challenge. Aspect-oriented software development is a separation of concerns technique that decomposes systems into distinct features with minimal overlap.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schliecker:2010:RTP, author = "Simon Schliecker and Rolf Ernst", title = "Real-time performance analysis of multiprocessor systems with shared memory", journal = j-TECS, volume = "10", number = "2", pages = "22:1--22:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880058", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Predicting timing behavior is key to reliable real-time system design and verification, but becomes increasingly difficult for current multiprocessor systems on chip. The integration of formerly separate functionality into a single multicore system introduces new intercore timing dependencies resulting from the common use of the now shared resources. This feedback of system timing on local timing makes traditional performance analysis approaches inappropriate. This article presents a general methodology to model the shared resource traffic and consider its effect on the local task execution. The aggregate busy time captures the timing of multiple accesses to a shared memory far better than the traditional models that focus on the timing of individual events.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2010:DAS, author = "Euiseong Seo and Sangwon Kim and Seonyeong Park and Joonwon Lee", title = "Dynamic alteration schemes of real-time schedules for {I/O} device energy efficiency", journal = j-TECS, volume = "10", number = "2", pages = "23:1--23:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880059", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many I/O devices provide multiple power states known as the dynamic power management (DPM) feature. However, activating from sleep state requires significant transition time and this obstructs utilizing DPM in nonpreemptive real-time systems. This article suggests nonpreemptive real-time task scheduling schemes maximizing the effectiveness of the I/O device DPM support. First, we introduce a runtime schedulability check algorithm for nonpreemptive real-time systems that can check whether a modification from a valid schedule is still valid. By using this, we suggest three heuristic algorithms. The first algorithm reorders the execution sequence of tasks according to the similarity of their required device sets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cabodi:2010:BSF, author = "Gianpiero Cabodi and Marco Murciano and Massimo Violante", title = "Boosting software fault injection for dependability analysis of real-time embedded applications", journal = j-TECS, volume = "10", number = "2", pages = "24:1--24:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880060", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The design of complex embedded systems deployed in safety-critical or mission-critical applications mandates the availability of methods to validate the system dependability across the whole design flow. In this article we introduce a fault injection approach, based on loadable kernel modules and running under the Linux operating system, which can be adopted as soon as a running prototype of the systems is available. Moreover, for the purpose of decoupling dependability analysis from hardware availability, we also propose the adoption of hardware virtualization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mohan:2010:PTA, author = "Sibin Mohan and Frank Mueller and Michael Root and William Hawkins and Christopher Healy and David Whalley and Emilio Vivancos", title = "Parametric timing analysis and its application to dynamic voltage scaling", journal = j-TECS, volume = "10", number = "2", pages = "25:1--25:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880061", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems with real-time constraints depend on a priori knowledge of worst-case execution times (WCETs) to determine if tasks meet deadlines. Static timing analysis derives bounds on WCETs but requires statically known loop bounds. This work removes the constraint on known loop bounds through parametric analysis expressing WCETs as functions. Tighter WCETs are dynamically discovered to exploit slack by dynamic voltage scaling (DVS) saving 60\% to 82\% energy over DVS-oblivious techniques and showing savings close to more costly dynamic-priority DVS algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2010:RAD, author = "Dakai Zhu", title = "Reliability-aware dynamic energy management in dependable embedded real-time systems", journal = j-TECS, volume = "10", number = "2", pages = "26:1--26:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880062", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent studies show that voltage scaling, which is an efficient energy management technique, has a direct and negative effect on system reliability because of the increased rate of transient faults (e.g., those induced by cosmic particles). In this article, we propose energy management schemes that explicitly take system reliability into consideration. The proposed reliability-aware energy management schemes dynamically schedule recoveries for tasks to be scaled down to recuperate the reliability loss due to energy management. Based on the amount of available slack, the application size, and the fault rate changes, we analyze when it is profitable to reclaim the slack for energy savings without sacrificing system reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ramaprasad:2010:TBF, author = "Harini Ramaprasad and Frank Mueller", title = "Tightening the bounds on feasible preemptions", journal = j-TECS, volume = "10", number = "2", pages = "27:1--27:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880063", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Data caches are an increasingly important architectural feature in most modern computer systems. They help bridge the gap between processor speeds and memory access times. One inherent difficulty of using data caches in a real-time system is the unpredictability of memory accesses, which makes it difficult to calculate worst-case execution times (WCETs) of real-time tasks. While cache analysis for single real-time tasks has been the focus of much research in the past, bounding the preemption delay in a multitask preemptive environment is a challenging problem, particularly for data caches. This article makes multiple contributions in the context of independent, periodic tasks with deadlines less than or equal to their periods executing on a single processor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2010:SMA, author = "Lian Li and Jingling Xue and Jens Knoop", title = "Scratchpad memory allocation for data aggregates via interval coloring in superperfect graphs", journal = j-TECS, volume = "10", number = "2", pages = "28:1--28:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880064", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Existing methods place data or code in scratchpad memory (SPM) by relying on heuristics or resorting to integer programming or mapping it to a graph-coloring problem. In this article, the SPM allocation problem for arrays is formulated as an interval coloring problem. The key observation is that in many embedded C programs, two arrays can be modeled such that either their live ranges do not interfere or one contains the other (with good accuracy). As a result, array interference graphs often form a special class of superperfect graphs (known as comparability graphs), and their optimal interval colorings become efficiently solvable. This insight has led to the development of an SPM allocation algorithm that places arrays in an interference graph in SPM by examining its maximal cliques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Singh:2010:CPD, author = "Montek Singh and Steven M. Nowick", title = "Call for papers: {Deadline: March 15, 2011}", journal = j-TECS, volume = "10", number = "2", pages = "29:1--29:??", month = dec, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1880050.1880065", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jan 10 09:44:12 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{West:2011:ASS, author = "Richard West and Gabriel Parmer", title = "Application-specific service technologies for commodity operating systems in real-time environments", journal = j-TECS, volume = "10", number = "3", pages = "30:1--30:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952523", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In order to eliminate the costs of proprietary systems and special purpose hardware, many real-time and embedded computing platforms are being built on commodity operating systems and generic hardware. Unfortunately, many such systems are ill-suited to the low-latency and predictable timing requirements of real-time applications. This article, therefore, focuses on application-specific service technologies for low-cost commodity operating systems and hardware, so that real-time service guarantees can be met. We describe contrasting methods to deploy first-class services on commodity systems that are dispatched with low latency and execute asynchronously according to bounds on CPU, memory, and I/O device usage. Specifically, we present a ``user-level sandboxing'' (ULS) mechanism that relies on hardware protection to isolate application-specific services from the core kernel.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2011:NBF, author = "Xue Liu and Tarek Abdelzaher", title = "Nonutilization bounds and feasible regions for arbitrary fixed-priority policies", journal = j-TECS, volume = "10", number = "3", pages = "31:1--31:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952524", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Prior research on schedulability bounds focused primarily on bounding utilization/ as a means to meet deadline constraints. Nontrivial bounds were found for a handful of scheduling policies in which utilization is directly related to the ability of the policy to meet deadlines. Examples include rate-monotonic, deadline-monotonic, and EDF scheduling. For most other scheduling policies, however, utilization is not correlated with schedulability. For example, shortest-job-first can miss deadlines at an arbitrarily low utilization. This raises the question of whether or not some other nonutilization-based metric might be more indicative of schedulability in those cases. This article answers the above question positively by extending the notion of schedulability bounds, in a uniform manner, to arbitrary (fixed) priorities and nonutilization metrics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nair:2011:EHB, author = "Ajay Nair and Karthik Shankar and Roman Lysecky", title = "Efficient hardware-based nonintrusive dynamic application profiling", journal = j-TECS, volume = "10", number = "3", pages = "32:1--32:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952525", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Application profiling---the process of monitoring an application to determine the frequency of execution within specific regions---is an essential step within the design process for many software and hardware systems. Profiling is often a critical step within hardware/software partitioning utilized to determine the critical kernels of an application. In this article, we present an innovative, nonintrusive dynamic application profiler (DAProf) capable of profiling an executing application by monitoring the application's short backward branches, function calls, and function returns. The resulting profile information provides an accurate characterization of the frequently executed loops within the application providing a breakdown of loop executions versus loop iterations per execution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aaraj:2011:FDE, author = "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha", title = "A framework for defending embedded systems against software attacks", journal = j-TECS, volume = "10", number = "3", pages = "33:1--33:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952526", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The incidence of malicious code and software vulnerability exploits on embedded platforms is constantly on the rise. Yet, little effort is being devoted to combating such threats to embedded systems. Moreover, adapting security approaches designed for general-purpose systems generally fails because of the limited processing capabilities of their embedded counterparts. In this work, we evaluate a malware and software vulnerability exploit defense framework for embedded systems. The proposed framework extends our prior work, which defines two isolated execution environments: a testing environment, wherein an untrusted application is first tested using dynamic binary instrumentation (DBI), and a real environment, wherein a program is monitored at runtime using an extracted behavioral model, along with a continuous learning process.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Berendsen:2011:FSA, author = "Jasper Berendsen and Biniam Gebremichael and Frits W. Vaandrager and Miaomiao Zhang", title = "Formal specification and analysis of {Zeroconf} using {Uppaal}", journal = j-TECS, volume = "10", number = "3", pages = "34:1--34:32", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952527", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The model checker Uppaal is used to formally model and analyze parts of Zeroconf, a protocol for dynamic configuration of IPv4 link-local addresses that has been defined in RFC 3927 of the IETF. Our goal has been to construct a model that (a) is easy to understand by engineers, (b) comes as close as possible to the informal text (for each transition in the model there should be a corresponding piece of text in the RFC), and (c) may serve as a basis for formal verification. Our modeling efforts revealed several errors (or at least ambiguities) in the RFC that no one else spotted before.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ykman-Couvreur:2011:FMM, author = "Ch. Ykman-Couvreur and V. Nollet and F. Catthoor and H. Corporaal", title = "Fast multidimension multichoice knapsack heuristic for {MP-SoC} runtime management", journal = j-TECS, volume = "10", number = "3", pages = "35:1--35:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952528", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Since the application complexity is growing and applications can be dynamically activated, the major challenge for heterogeneous multiprocessor platforms is to select at runtime an energy-efficient mapping of these applications. Taking into account that many different possible implementations per application can be available, and that the selection must meet the application deadlines under the available platform resources, this runtime optimization problem can be modeled as a Multidimension Multichoice Knapsack Problem (MMKP), which is known to be NP-hard. Not only algorithms for an optimal solution, but also state-of-the-art heuristics for real-time systems are still too slow for runtime management of multiprocessor platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ragel:2011:HHS, author = "Roshan G. Ragel and Sri Parameswaran", title = "A hybrid hardware--software technique to improve reliability in embedded processors", journal = j-TECS, volume = "10", number = "3", pages = "36:1--36:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952529", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Numerous methods have been described in research literature with methods to improve reliability of processors by the use of control-flow checking. High performance and code-size penalties cripple the proposed software approaches, while hardware approaches are not scalable and are thus rarely implemented in real embedded systems. In this article, we show that by including control-flow checking as an issue to be considered when designing as embedded processor, we are able to reduce overheads considerably and still provide a scalable solution to this problem. The technique described in this article includes architectural improvements to the processor and binary rewriting of the application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huynh:2011:EAR, author = "Johnny Huynh and Jos{\'e} Nelson Amaral and Paul Berube and Sid-Ahmed-Ali Touati", title = "Evaluating address register assignment and offset assignment algorithms", journal = j-TECS, volume = "10", number = "3", pages = "37:1--37:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952530", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In digital signal processors (DSPs), variables are accessed using $k$ address registers. The problem of finding a memory layout, for a set of variables, that minimizes the address-computation overhead is known as the General Offset Assignment (GOA) problem. The most common approach to this problem is to partition the set of variables into $k$ partitions and to assign each partition to an address register. Thus, effectively decomposing the GOA problem into several Simple Offset Assignment (SOA) problems. Many heuristic-based algorithms are proposed in the literature to approximate solutions to both the variable partitioning and the SOA problems. However, the address-computation overhead of the resulting memory layouts are not accurately evaluated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Diguet:2011:CLB, author = "Jean-Philippe Diguet and Yvan Eustache and Guy Gogniat", title = "Closed-loop--based self-adaptive {Hardware\slash Software-Embedded} systems: Design methodology and smart {CAM} case study", journal = j-TECS, volume = "10", number = "3", pages = "38:1--38:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952522.1952531", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon May 2 10:07:27 MDT 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents our methodology for implementing self-adaptivness within an OS-based and reconfigurable embedded system according to objectives such as quality of service, performance, or power consumption. We detail our approach to separate application-specific decisions and hardware\slash software-implementation decisions at system level. The former are related to the efficiency control of applications and based on the knowledge of application engineers. The latter are generic and address the choice between various hardware and software implementations according to user objectives. The decision management is implemented as an adaptive closed-loop model. We describe how each design step may be implemented and especially how we solved the issue of stability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gamatie:2011:MDD, author = "Abdoulaye Gamati{\'e} and S{\'e}bastien {Le Beux} and {\'E}ric Piel and Rabie {Ben Atitallah} and Anne Etien and Philippe Marquet and Jean-Luc Dekeyser", title = "A Model-Driven Design Framework for Massively Parallel Embedded Systems", journal = j-TECS, volume = "10", number = "4", pages = "39:1--39:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043663", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern embedded systems integrate more and more complex functionalities. At the same time, the semiconductor technology advances enable to increase the amount of hardware resources on a chip for the execution. Massively parallel embedded systems specifically deal with the optimized usage of such hardware resources to efficiently execute their functionalities. The design of these systems mainly relies on the following challenging issues: first, how to deal with the parallelism in order to increase the performance; second, how to abstract their implementation details in order to manage their complexity; third, how to refine these abstract representations in order to produce efficient implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2011:DPT, author = "Seungkyun Kim and Kiwon Kwon and Chihun Kim and Choonki Jang and Jaejin Lee and Sang Lyul Min", title = "Demand Paging Techniques for Flash Memory Using Compiler Post-Pass Optimizations", journal = j-TECS, volume = "10", number = "4", pages = "40:1--40:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043664", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose an application-specific demand paging mechanism for low-end embedded systems that have flash memory as secondary storage. These systems are not equipped with virtual memory. A small memory space called an execution buffer is used to page the code of an application. An application-specific page manager manages the buffer. The page manager is automatically generated by a compiler post-pass optimizer and combined with the application image. The post-pass optimizer analyzes the executable image and transforms function call/return instructions into calls to the page manager. As a result, each function in the code can be loaded into the memory on demand at runtime.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dini:2011:LLA, author = "Gianluca Dini and Ida M. Savino", title = "{LARK}: a Lightweight Authenticated {ReKeying} Scheme for Clustered Wireless Sensor Networks", journal = j-TECS, volume = "10", number = "4", pages = "41:1--41:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043665", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Group communication has proven a powerful paradigm for designing applications and services in Wireless Sensor Networks (WSNs). Given the tight interaction between WSNs and the physical world, a security infringement may translate into a safety infringement. Therefore, in order to fully exploit the group communication paradigm we need to secure it. Traditionally, this requirement has been formalized in terms of backward and forward security and fulfilled by means of rekeying. In WSNs, group rekeying becomes particularly a complex problem because communication takes place over an easily accessible wireless medium and because sensor nodes have severe limitations in terms of computing, storage, energy, and tamper-resistance capabilities for cost reasons.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schoeberl:2011:HAL, author = "Martin Schoeberl and Stephan Korsholm and Tomas Kalibera and Anders P. Ravn", title = "A Hardware Abstraction Layer in {Java}", journal = j-TECS, volume = "10", number = "4", pages = "42:1--42:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043666", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems use specialized hardware devices to interact with their environment, and since they have to be dependable, it is attractive to use a modern, type-safe programming language like Java to develop programs for them. Standard Java, as a platform-independent language, delegates access to devices, direct memory access, and interrupt handling to some underlying operating system or kernel, but in the embedded systems domain resources are scarce and a Java Virtual Machine (JVM) without an underlying middleware is an attractive architecture. The contribution of this article is a proposal for Java packages with hardware objects and interrupt handlers that interface to such a JVM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gilroy:2011:RHA, author = "Michael Gilroy and James Irvine and Robert Atkinson", title = "{RAID 6} Hardware Acceleration", journal = j-TECS, volume = "10", number = "4", pages = "43:1--43:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043667", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Inexpensive, reliable hard disk storage is increasingly required in both businesses and the home. As disk capacities increase and multiple drives are combined in one system the probability of multiple disk failures increases. Through the adoption of RAID 6 the capability to recover from up to two simultaneous disk failures becomes available. In this article, we present three different RAID 6 implementations each tailored to support different target applications and optimized to reduce overall hardware resource utilization. We present an optimal Reed-Solomon-based RAID 6 implementation for arrays of four disks. We also present the smallest in terms of hardware resource utilization as well having the highest throughput RAID 6 hardware solution for disk arrays of up to 15 drives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhuang:2011:CST, author = "Xiaotong Zhuang and Santosh Pande", title = "Compiler-Supported Thread Management for Multithreaded Network Processors", journal = j-TECS, volume = "10", number = "4", pages = "44:1--44:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043668", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traditionally, runtime management involving CPU sharing, real-time scheduling, etc., is provided by the runtime environment (typically an operating system) using hardware support such as timers and interrupts. However, due to stringent performance requirements on network processors, neither OS nor hardware mechanisms are typically feasible/available. Mapping packet processing tasks on network processors involves complex trade-offs to maximize parallelism and pipelining. Due to an increase in the size of the code store and complexity of application requirements, network processors are being programmed with heterogeneous threads that may execute code belonging to different tasks on a given micro-engine. Also, most network applications are streaming applications that are typically processed in a pipelined fashion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stuart:2011:RRN, author = "Matthias Bo Stuart and Mikkel Bystrup Stensgaard and Jens Spars{\o}", title = "The {ReNoC} Reconfigurable {Network-on-Chip}: Architecture, Configuration Algorithms, and Evaluation", journal = j-TECS, volume = "10", number = "4", pages = "45:1--45:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043669", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a reconfigurable network-on-chip architecture called ReNoC, which is intended for use in general-purpose multiprocessor system-on-chip platforms, and which enables application-specific logical NoC topologies to be configured, thus providing both efficiency and flexibility. The article presents three novel algorithms that synthesize an application-specific NoC topology, map it onto the physical ReNoC architecture, and create deadlock-free, application-specific routing algorithms. We apply our algorithms to a mixture of real and synthetic applications and target three different physical architectures. Compared to a conventional NoC, ReNoC reduces power consumption by up to 58\% on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cucinotta:2011:RMA, author = "Tommaso Cucinotta and Luca Abeni and Luigi Palopoli and Giuseppe Lipari", title = "A Robust Mechanism for Adaptive Scheduling of Multimedia Applications", journal = j-TECS, volume = "10", number = "4", pages = "46:1--46:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043670", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose an adaptive scheduling technique to schedule highly dynamic multimedia tasks on a CPU. We use a combination of two techniques: the first one is a feedback mechanism to track the resource requirements of the tasks based on ``local'' observations. The second one is a mechanism that operates with a ``global'' visibility, reclaiming unused bandwidth. The combination proves very effective: resource reclaiming increases the robustness of the feedback, while the identification of the correct bandwidth made by the feedback increases the effectiveness of the reclamation. We offer both theoretical results and an extensive experimental validation of the approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Touati:2011:ESR, author = "Sid-Ahmed-Ali Touati and Frederic Brault and Karine Deschinkel and Beno{\^\i}t Dupont de Dinechin", title = "Efficient Spilling Reduction for Software Pipelined Loops in Presence of Multiple Register Types in Embedded {VLIW} Processors", journal = j-TECS, volume = "10", number = "4", pages = "47:1--47:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043671", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Integrating register allocation and software pipelining of loops is an active research area. We focus on techniques that precondition the dependence graph before software pipelining in order to ensure that no register spill instructions are inserted by the register allocator in the software pipelined loop. If spilling is not necessary for the input code, preconditioning techniques insert dependence arcs so that the maximum register pressure MAXLIVE achieved by any loop schedule is below the number of available registers, without hurting the initiation interval if possible. When a solution exists, a spill-free software pipeline is guaranteed to exist. Existing preconditioning techniques consider one register type (register class) at a time [Deschinkel and Touati 2008].", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2011:ARA, author = "Gang Zhou and Qiang Li and Jingyuan Li and Yafeng Wu and Shan Lin and Jian Lu and Chieh-Yih Wan and Mark D. Yarvis and John A. Stankovic", title = "Adaptive and Radio-Agnostic {QoS} for Body Sensor Networks", journal = j-TECS, volume = "10", number = "4", pages = "48:1--48:??", month = nov, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/2043662.2043672", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 19 15:49:06 MST 2011", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As wireless devices and sensors are increasingly deployed on people, researchers have begun to focus on wireless body-area networks. Applications of wireless body sensor networks include healthcare, entertainment, and personal assistance, in which sensors collect physiological and activity data from people and their environments. In these body sensor networks, quality of service is needed to provide reliable data communication over prioritized data streams. This article proposes BodyQoS, the first running QoS system demonstrated on an emulated body sensor network. BodyQoS adopts an asymmetric architecture, in which most processing is done on a resource-rich aggregator, minimizing the load on resource-limited sensor nodes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wandeler:2012:UGS, author = "Ernesto Wandeler and Alexander Maxiaguine and Lothar Thiele", title = "On the use of greedy shapers in real-time embedded systems", journal = j-TECS, volume = "11", number = "1", pages = "1:1--1:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146418", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traffic shaping is a well-known technique in the area of networking and is proven to reduce global buffer requirements and end-to-end delays in networked systems. Due to these properties, shapers also play an increasingly important role in the design of multiprocessor embedded systems that exhibit a considerable amount of on-chip traffic. Despite the growing importance of traffic shapping in this area, no methods exist for analyzing shapers in distributed embedded systems and for incorporating them into a system-level performance analysis. Until now it was not possible to determine the effect of shapers on end-to-end delay guarantees or buffer requirements in such systems. In this work, we present a method for analyzing greedy shapers, and we embed this analysis method into a well-established modular performance analysis framework for real-time embedded systems. The presented approach enables system-level performance analysis of complete systems with greedy shapers, and we prove its applicability by analyzing three case study systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hamers:2012:EMS, author = "Juan Hamers and Lieven Eeckhout", title = "Exploiting media stream similarity for energy-efficient decoding and resource prediction", journal = j-TECS, volume = "11", number = "1", pages = "2:1--2:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146419", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces a novel approach to energy-efficient media stream decoding that is based on the notion of media stream similarity. The key idea is that platform-independent scenarios with similar decoding complexity can be identified within and across media streams. A device that decodes a media stream annotated with scenario information can then adjust its processor clock frequency and voltage level based on these scenarios for lower energy consumption. Our evaluation, done using the H.264 AVC decoder and 12 reference video streams, shows an average energy reduction of 44\% while missing less than 0.2\% of the frame deadlines using scenario-driven video decoding. An additional application of scenario-based media stream annotation is to predict required resources (compute power and energy) for consuming a given service on a given device. Resource prediction is extremely useful in a client-server setup in which the client requests a media service from the server or content provider. The content provider (in cooperation with the client) can then determine what service quality to deliver, given the client's available resources. Scenario-aware resource prediction can predict (compute power and energy) consumption with errors less than 4\% (and an overall average 1.4\% error).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhong:2012:WSN, author = "Ziguo Zhong and Tian He", title = "Wireless sensor node localization by multisequence processing", journal = j-TECS, volume = "11", number = "1", pages = "3:1--3:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146420", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless Sensor Networks have been proposed for use in many location-dependent applications. Most of these need to identify the locations of sensor nodes, a challenging task because of severe constraints on cost, energy and effective range of sensor devices. To overcome limitations in existing solutions, we present a Multi-Sequence Positioning (MSP) method for large-scale stationary sensor node localization in outdoor environments. The novel idea behind MSP is to reconstruct and estimate two-dimensional location information for each sensor node by processing multiple one-dimensional node sequences, easily obtained through loosely guided event distribution. Starting from a basic MSP design, we propose four optimizations that work together to increase localization accuracy. We address several interesting issues such as incomplete (partial) node sequences and sequence flip, found in the Mirage test-bed we built. We have evaluated the MSP system through theoretical analysis, extensive simulation as well as two physical systems (an indoor version with 46 MICAz motes and an outdoor version with 20 MICAz motes). Evaluation demonstrates that MSP can achieve an accuracy within one foot, requiring neither additional costly hardware on sensor nodes nor precise event distribution. In fact, it provides a nice tradeoff between physical cost (anchors) and soft cost (events) while maintaining localization accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Peng:2012:BHA, author = "Chunyi Peng and Guobin Shen and Yongguang Zhang", title = "{BeepBeep}: a high-accuracy acoustic-based system for ranging and localization using {COTS} devices", journal = j-TECS, volume = "11", number = "1", pages = "4:1--4:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146421", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present the design and implementation of BeepBeep, a high-accuracy acoustic-based system for ranging and localization. It is a pure software-based solution and uses the most basic set of commodity hardware --- a speaker, a microphone, and some form of interdevice communication. The ranging scheme works without any infrastructure and is applicable to sensor platforms and commercial-off-the-shelf mobile devices. It achieves high accuracy through three techniques: two-way sensing, self-recording, and sample counting. We further devise a scalable and fast localization scheme. Our experiments show that up to one-centimeter ranging accuracy and three-centimeter localization accuracy can be achieved.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2012:CMA, author = "T. S. Rajesh Kumar and R. Govindarajan and C. P. Ravikumar", title = "On-chip memory architecture exploration framework for {DSP} processor-based embedded system on chip", journal = j-TECS, volume = "11", number = "1", pages = "5:1--5:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146422", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today's SoCs are complex designs with multiple embedded processors, memory subsystems, and application specific peripherals. The memory architecture of embedded SoCs strongly influences the power and performance of the entire system. Further, the memory subsystem constitutes a major part (typically up to 70\%) of the silicon area for the current day SoC. In this article, we address the on-chip memory architecture exploration for DSP processors which are organized as multiple memory banks, where banks can be single/dual ported with non-uniform bank sizes. In this paper we propose two different methods for physical memory architecture exploration and identify the strengths and applicability of these methods in a systematic way. Both methods address the memory architecture exploration for a given target application by considering the application's data access characteristics and generates a set of Pareto-optimal design points that are interesting from a power, performance and VLSI area perspective. To the best of our knowledge, this is the first comprehensive work on memory space exploration at physical memory level that integrates data layout and memory exploration to address the system objectives from both hardware design and application software development perspective. Further we propose an automatic framework that explores the design space identifying 100's of Pareto-optimal design points within a few hours of running on a standard desktop configuration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pande:2012:PDP, author = "Amit Pande and Joseph Zambreno", title = "{Poly-DWT}: {Polymorphic} wavelet hardware support for dynamic image compression", journal = j-TECS, volume = "11", number = "1", pages = "6:1--6:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146423", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many modern computing applications have been enabled through the use of real-time multimedia processing. While several hardware architectures have been proposed in the research literature to support such primitives, these fail to address applications whose performance and resource requirements have a dynamic aspect. Embedded multimedia systems typically need a power and computation efficient design in addition to good compression performance. In this article, we introduce a Polymorphic Wavelet Architecture (Poly-DWT) as a crucial building block towards the development of embedded systems to address such challenges. We illustrate how our Poly-DWT architecture can potentially make dynamic resource allocation decisions, such as the internal bit representation and the processing kernel, according to the application requirements. We introduce a filter switching architecture that allows for dynamic switching between 5/3 and 9/7 wavelet filters and leads to a more power efficient design. Further, a multiplier-free design with a low adder requirement demonstrates the potential of Poly-DWT for embedded systems. Through an FPGA prototype, we perform a quantitative analysis of our Poly-DWT architecture, and compare our filter to existing approaches to illustrate the area and performance benefits inherent in our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2012:RGV, author = "Suk-Hyun Seo and Jin-Ho Kim and Sung-Ho Hwang and Key Ho Kwon and Jae Wook Jeon", title = "A reliable gateway for in-vehicle networks based on {LIN}, {CAN}, and {FlexRay}", journal = j-TECS, volume = "11", number = "1", pages = "7:1--7:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146424", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article describes a reliable gateway for in-vehicle networks. Such networks include local interconnect networks, controller area networks, and FlexRay. There is some latency when transferring a message from one node (source) to another node (destination). A high probability of error exists due to different protocol specifications such as baud-rate, and message frame format. Therefore, deploying a reliable gateway is a challenge to the automotive industry. We propose a reliable gateway based on the OSEK/VDX components for in-vehicle networks. We also examine the gateway system developed, and then we evaluate the performance of our proposed system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2012:EFP, author = "Kai Huang and Wolfgang Haid and Iuliana Bacivarov and Matthias Keller and Lothar Thiele", title = "Embedding formal performance analysis into the design cycle of {MPSoCs} for real-time streaming applications", journal = j-TECS, volume = "11", number = "1", pages = "8:1--8:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146425", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern real-time streaming applications are increasingly implemented on multiprocessor systems-on-chip (MPSoC). The implementation, as well as the verification of real-time applications executing on MPSoCs, are difficult tasks, however. A major challenge is the performance analysis of MPSoCs, which is required for early design space exploration and final system verification. Simulation-based methods are not well-suited for this purpose, due to long runtimes and non-exhaustive corner-case coverage. To overcome these limitations, formal performance analysis methods that provide guarantees for meeting real-time constraints have been developed. Embedding formal performance analysis into the MPSoC design cycle requires the generation of a faithful analysis model and its calibration with the system-specific parameters. In this article, a design flow that automates these steps is presented. In particular, we integrate modular performance analysis (MPA) into the distributed operation layer (DOL) MPSoC programming environment. The result is an MPSoC software design flow that allows for automatically generating the system implementation, together with an analysis model for system verification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2012:AFS, author = "Yuan-Hao Chang and Po-Liang Wu and Tei-Wei Kuo and Shih-Hao Hung", title = "An adaptive file-system-oriented {FTL} mechanism for flash-memory storage systems", journal = j-TECS, volume = "11", number = "1", pages = "9:1--9:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146426", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As flash memory becomes popular over various platforms, there is a strong demand regarding the performance degradation problem, due to the special characteristics of flash memory. This research proposes the design of a file-system-oriented flash translation layer, in which a filter mechanism is designed to separate the access requests of file-system metadata and file contents for better performance. A recovery scheme is then proposed for maintaining the integrity of a file system. The proposed flash translation layer is implemented as a Linux device driver and evaluated with respect to ext2 and ext3 file systems. Experiments were also done over NTFS by a series of realistic traces. The experimental results show significant performance improvement over ext2, ext3, and NTFS file systems with limited system overheads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2012:SRS, author = "Chunxiao Li and Niraj K. Jha and Anand Raghunathan", title = "Secure reconfiguration of software-defined radio", journal = j-TECS, volume = "11", number = "1", pages = "10:1--10:??", month = mar, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2146417.2146427", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Apr 2 17:42:24 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software-defined radio (SDR) implements a radio system in software that executes on a programmable processor. The components of SDR, such as the filters, amplifiers, and modulators, can be easily reconfigured to adapt to the operating environment and user preferences. However, the flexibility of radio reconfiguration brings along the serious security concern of malicious modification of software in the SDR system, leading to radio malfunction and interference with other users' communications. Both the SDR device and the network need to be protected from such malicious radio reconfiguration. In this article, a new architecture targeted at protecting SDR devices from malicious reconfiguration is proposed. The architecture is based on robust separation of the radio operation environment and user application environment, through the use of virtualization. A new radio middleware layer is designed to securely intercept all attempts to reconfigure the radio, and a security policy monitor checks the target configuration against security policies that represent the interests of various parties. Even if the operating system in the user application environment is compromised, the proposed architecture can ensure secure reconfiguration in the radio operation environment. We have prototyped the proposed secure SDR architecture using VMware and the GNU Radio toolkit and demonstrate that overheads incurred by the architecture are small and tolerable. Therefore, we believe that the proposed solution could be applied to address secure SDR reconfiguration in both general-purpose and embedded computing systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Berekovic:2012:ISS, author = "Mladen Berekovic and Samarjit Chakraborty and Petru Eles and Andy D. Pimentel", title = "Introduction to the {Special Section on ESTIMedia'08}", journal = j-TECS, volume = "11S", number = "1", pages = "11:1--11:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180891", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2012:PAR, author = "Jun Zhu and Ingo Sander and Axel Jantsch", title = "Performance Analysis of Reconfigurations in Adaptive Real-Time Streaming Applications", journal = j-TECS, volume = "11S", number = "1", pages = "12:1--12:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180888", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose a performance analysis framework for adaptive real-time synchronous data flow streaming applications on runtime reconfigurable FPGAs. As the main contribution, we present a constraint based approach to capture both streaming application execution semantics and the varying design concerns during reconfigurations. With our event models constructed as cumulative functions on data streams, we exploit a novel compile-time analysis framework based on iterative timing phases. Finally, we implement our framework on a public domain constraint solver, and illustrate its capabilities in the analysis of design trade-offs due to reconfigurations with experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsieh:2012:PBP, author = "Kun-Yuan Hsieh and Chi-Hua Lai and Shang-Hong Lai and Jenq Kuen Lee", title = "Parallelization of Belief Propagation on {Cell} Processors for Stereo Vision", journal = j-TECS, volume = "11S", number = "1", pages = "13:1--13:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180889", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Markov random field models provide a robust formulation for the stereo vision problem of inferring three-dimensional scene geometry from two images taken from different viewpoints. One of the most advanced algorithms for solving the associated energy minimization problem in the formulation is belief propagation (BP). Although BP provides very accurate results in solving stereo vision problems, the high computational cost of the algorithm hinders it from real-time applications. In recent years, multicore architectures have been widely adopted in various industrial application domains. The high computing power of multicore processors provides new opportunities to implement stereo vision algorithms. This article examines and extracts the parallelisms in the BP method for stereo vision on multicore processors. This article shows that parallelism of the algorithm can be efficiently utilized on multicore processors. The results show that parallelization on multicore processors provides a speedup for the BP algorithm of almost 15 times compared to the single-processor implementation on the PPE of the Cell BE. The experimental results also indicate that a frame rate of 6.5 frames/second is possible when implementing the parallelized BP algorithm on the multicore processor of Cell BE with one PPE and six SPEs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Terechko:2012:BPS, author = "Andrei Terechko and Jan Hoogerbrugge and Ghiath Alkadi and Surendra Guntur and Anirban Lahiri and Marc Duranton and Clemens W{\"u}st and Phillip Christie and Axel Nackaerts and Aatish Kumar", title = "Balancing Programmability and Silicon Efficiency of Heterogeneous Multicore Architectures", journal = j-TECS, volume = "11S", number = "1", pages = "14:1--14:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180890", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multicore architectures provide scalable performance with a lower hardware design effort than single core processors. Our article presents a design methodology and an embedded multicore architecture, focusing on reducing the software design complexity and boosting the performance density. First, we analyze characteristics of the Task-Level Parallelism in modern multimedia workloads. These characteristics are used to formulate requirements for the programming model. Then we translate the programming model requirements to an architecture specification, including a novel low-complexity implementation of cache coherence and a hardware synchronization unit. Our evaluation demonstrates that the novel coherence mechanism substantially simplifies hardware design, while reducing the performance by less than 18\% relative to a complex snooping technique. Compared to a single processor core, the multicores have already proven to be more area- and energy-efficient. However, the multicore architectures in embedded systems still compete with highly efficient function-specific hardware accelerators. In this article we identify five architectural methods to boost performance density of multicores; microarchitectural downscaling, asymmetric multicore architectures, multithreading, generic accelerators, and conjoining. Then, we present a novel methodology to explore multicore design spaces, including the architectural methods improving the performance density. The methodology is based on a complex formula computing performances of heterogeneous multicore systems. Using this design space exploration methodology for HD and QuadHD H.264 video decoding, we estimate that the required areas of multicores in CMOS 45 nm are 2.5 mm$^2$ and 8.6 mm$^2$, respectively. These results suggest that heterogeneous multicores are cost-effective for embedded applications and can provide a good programmability support.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khajeh:2012:EAA, author = "Amin Khajeh and Minyoung Kim and Nikil Dutt and Ahmed M. Eltawil and Fadi J. Kurdahi", title = "Error-Aware Algorithm\slash Architecture Coexploration for Video Over Wireless Applications", journal = j-TECS, volume = "11S", number = "1", pages = "15:1--15:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180892", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose a cross-layer algorithm/architecture coexploration for wireless multimedia systems to coordinate interactions among sublayer optimizers for improvements in energy/QoS/reliability. By exploiting the inherent redundancy in wireless multimedia systems, we generate an expanded design space over traditional layer-specific approaches. Specifically, we control the error resilient encoder at the application layer to provide awareness of architectural exploration at the physical layer allowing new design points with lower power consumption via aggressive voltage scaling. While trying to reduce energy consumption, the fault tolerant technique compensates the effect of the hardware and network errors due to aggressive voltage scaling and lossy transmission, respectively. Our experiments on H.263 video over a WCDMA communication system demonstrate that coexploration enlarges the feasible design space, which results in significant power savings of more than 20\% in the WCDMA modem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Salamy:2012:SOT, author = "Hassan Salamy and J. Ramanujam", title = "Storage Optimization through Offset Assignment with Variable Coalescing", journal = j-TECS, volume = "11S", number = "1", pages = "16:1--16:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180893", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most modern digital signal processors (DSPs) provide multiple address registers and a dedicated address generation unit (AGU) which performs address generation in parallel to instruction execution. There is no address computation overhead if the next address is within the auto-modify range. A careful placement of variables in memory is utilized to decrease the number of address arithmetic instructions and thus to generate compact and efficient code. The simple offset assignment (SOA) problem concerns the layout of variables for machines with one address register and the general offset assignment (GOA) deals with multiple address registers. Both these problems assume that each variable needs to be allocated for the entire duration of a program. Both SOA and GOA are NP-complete. In this article, we present effective heuristics for the simple and the general offset assignment problems with variable coalescing where two or more non-interfering variables can be mapped into the same memory location. Results on several benchmarks show the significant improvement of our proposed heuristics compared to other heuristics in the literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Falk:2012:ISS, author = "Heiko Falk and Peter Marwedel", title = "Introduction to the {Special Section on SCOPES'09}", journal = j-TECS, volume = "11S", number = "1", pages = "17:1--17:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180894", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2012:FLF, author = "Jaegeuk Kim and Hyotaek Shim and Seon-Yeong Park and Seungryoul Maeng and Jin-Soo Kim", title = "{FlashLight}: a Lightweight Flash File System for Embedded Systems", journal = j-TECS, volume = "11S", number = "1", pages = "18:1--18:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180895", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A very promising approach for using NAND flash memory as a storage medium is a flash file system. In order to design a higher-performance flash file system, two issues should be considered carefully. One issue is the design of an efficient index structure that contains the locations of both files and data in the flash memory. For large-capacity storage, the index structure must be stored in the flash memory to realize low memory consumption; however, this may degrade the system performance. The other issue is the design of a novel garbage collection (GC) scheme that reclaims obsolete pages. This scheme can induce considerable additional read and write operations while identifying and migrating valid pages. In this article, we present a novel flash file system that has the following features: (i) a lightweight index structure that introduces the hybrid indexing scheme and intra-inode index logging, and (ii) an efficient GC scheme that adopts a dirty list with an on-demand GC approach as well as fine-grained data separation and erase-unit data allocation. We implemented FlashLight in a Linux OS with kernel version 2.6.21 on an embedded device. The experimental results obtained using several benchmark programs confirm that FlashLight improves the performance by up to 27.4\% over UBIFS by alleviating index management and GC overheads by up to 33.8\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Eriksson:2012:ICG, author = "Mattias Eriksson and Christoph Kessler", title = "Integrated Code Generation for Loops", journal = j-TECS, volume = "11S", number = "1", pages = "19:1--19:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180896", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Code generation in a compiler is commonly divided into several phases: instruction selection, scheduling, register allocation, spill code generation, and, in the case of clustered architectures, cluster assignment. These phases are interdependent; for instance, a decision in the instruction selection phase affects how an operation can be scheduled We examine the effect of this separation of phases on the quality of the generated code. To study this we have formulated optimal methods for code generation with integer linear programming; first for acyclic code and then we extend this method to modulo scheduling of loops. In our experiments we compare optimal modulo scheduling, where all phases are integrated, to modulo scheduling, where instruction selection and cluster assignment are done in a separate phase. The results show that, for an architecture with two clusters, the integrated method finds a better solution than the nonintegrated method for 27\% of the instances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Murray:2012:ASL, author = "Alastair Murray and Bj{\"o}rn Franke", title = "Adaptive Source-Level Data Assignment to Dual Memory Banks", journal = j-TECS, volume = "11S", number = "1", pages = "20:1--20:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180897", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dual memory banks provide extra memory bandwidth to DSP applications and enable simultaneous access to two operands if the data is partitioned appropriately. Fully automated and compiler integrated approaches to data partitioning and memory bank assignment have, however, found little acceptance by DSP software developers. In this article we present a novel source-level approach that is more programmer friendly. Our scheme is based on soft graph coloring and highly adaptive heuristics generated by genetic programming. We have evaluated our scheme on an Analog Devices TigerSHARC TS-101 DSP and achieved speedups of up to 57\% on 13 UTDSP benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boissinot:2012:SPR, author = "Benoit Boissinot and Philip Brisk and Alain Darte and Fabrice Rastello", title = "{SSI} Properties Revisited", journal = j-TECS, volume = "11S", number = "1", pages = "21:1--21:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180898", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The static single information (SSI) form is an extension of the static single assignment (SSA) form, a well-established compiler intermediate representation that has been successfully used for numerous compiler analysis and optimizations. Several interesting results have also been shown for SSI form concerning liveness analysis and the representation of live-ranges of variables, which could make SSI form appealing for just-in-time compilation. Unfortunately, we have uncovered several mistakes in the previous literature on SSI form, which, admittedly, is already quite sparse. This article corrects the mistakes that are most germane to SSI form. We first explain why the two definitions of SSI form proposed in past literature, first by C. S. Ananian, then by J. Singer, are not equivalent. Our main result is then to prove that basic blocks, and thus program points, can be totally ordered so that live-ranges of variables correspond to intervals on a line, a result that holds for both variants of SSI form. In other words, in SSI form, the intersection graph defined by live-ranges is an interval graph, a stronger structural property than for SSA form for which the intersection graph of live-ranges is chordal. Finally, we show how this structure of live-ranges can be used to simplify liveness analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Franke:2012:SPM, author = "Bj{\"o}rn Franke", title = "Statistical Performance Modeling in Functional Instruction Set Simulators", journal = j-TECS, volume = "11S", number = "1", pages = "22:1--22:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180899", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Despite the recent progress in improving the speed of instruction-accurate simulators cycle-accurate simulation is still prohibitively slow for all but the most basic programs. In this article we present a statistical machine learning approach to performance estimation in fast, instruction accurate simulators and evaluate our methodology comprehensively against three popular embedded RISC processors and about 300 embedded applications. We show that our methodology is capable of providing accurate performance estimations with an average error of less than 3.9\% while, on average, operating $ \approx 14.5 $ times faster than cycle-accurate simulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chandraiah:2012:CAR, author = "Pramod Chandraiah and Rainer D{\"o}mer", title = "Computer-Aided Recoding to Create Structured and Analyzable System Models", journal = j-TECS, volume = "11S", number = "1", pages = "23:1--23:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180900", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In embedded system design, the quality of the input model has a direct bearing on the effectiveness of the system exploration and synthesis tools. Given a well-written system model, tools today are effective in generating working implementations. However, readily available C reference code is not conducive for immediate system synthesis as it lacks needed features for automatic analysis and synthesis. Among others, the lack of proper structure and the presence of intractable pointers in the reference code are factors that seriously hamper the effectiveness of system design tools. To overcome these deficiencies, we aim to automate the conversion of flat C code into a well-structured system model by applying automated source code transformations. We present a set of computer-aided recoding operations that enable the system designer to mitigate pointer problems and quickly create the necessary structural hierarchy so that the design model becomes easily analyzable and synthesizable. Utilizing the designer's knowledge, our interactive recoding transformations aid the designer in efficiently creating well-structured system models for rapid design space exploration and successful synthesis. Our estimated and measured experimental results show significant productivity gains through a substantial reduction of the model creation time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dubach:2012:EPE, author = "Christophe Dubach and Timothy M. Jones and Michael F. P. O'Boyle", title = "Exploring and Predicting the Effects of Microarchitectural Parameters and Compiler Optimizations on Performance and Energy", journal = j-TECS, volume = "11S", number = "1", pages = "24:1--24:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2180887.2180901", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 7 16:18:52 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded processor performance is dependent on both the underlying architecture and the compiler optimizations applied. However, designing both simultaneously is extremely difficult to achieve due to the time constraints designers must work under. Therefore, current methodology involves designing compiler and architecture in isolation, leading to suboptimal performance of the final product. This article develops a novel approach to this codesign space problem. For our specific design space, we demonstrate that we can automatically predict the performance that an optimizing compiler would achieve without actually tuning it for any of the microarchitecture configurations considered. Once trained, a single run of the program compiled with the standard optimization setting is enough to make a prediction on the new microarchitecture with just a 3.2\% error rate on average. This allows the designer to accurately choose an architectural configuration with knowledge of how an optimizing compiler will perform on it. We use this to find the best optimizing compiler/architectural configuration in our codesign space and demonstrate that it achieves an average 19\% performance improvement and energy savings of 16\% compared to the baseline, nearly doubling the energy-efficiency measured as the energy-delay-squared product (EDD).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Staff:2012:APA, author = "{TECS Staff}", title = "Abstracts of Papers to appear in {Special Supplemental Issue of TECS (v11, iSupplemental1)}", journal = j-TECS, volume = "11", number = "2", pages = "25:1--25:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In order to speed up the publication process, we have begun to publish supplemental online-only issues. The following abstracts describe the articles in the first such issue, Vol. 11S(1). These articles are available in the Digital Library.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2012:PPI, author = "Jongeun Lee and Aviral Shrivastava", title = "{PICA}: {Processor Idle Cycle Aggregation} for Energy-Efficient Embedded Systems", journal = j-TECS, volume = "11", number = "2", pages = "26:1--26:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Processor Idle Cycle Aggregation (PICA) is a promising approach for low-power execution of processors, in which small memory stalls are aggregated to create large ones, enabling profitable switch of the processor into low-power mode. We extend the previous approach in three dimensions. First we develop static analysis for the PICA technique and present optimal parameters for five common types of loops based on steady-state analysis. Second, to remedy the weakness of software-only control in varying environment, we enhance PICA with minimal hardware extension that ensures correct execution for any loops and parameters, thus greatly facilitating exploration-based parameter tuning. Third, we demonstrate that our PICA technique can be applied to certain types of nested loops with variable bounds, thus enhancing the applicability of PICA. We validate our analytical model against simulation-based optimization and also show, through our experiments on embedded application benchmarks, that our technique can be applied to a wide range of loops with average 20\% energy reductions, compared to executions without PICA.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{McIntire:2012:EES, author = "Dustin McIntire and Thanos Stathopoulos and Sasank Reddy and Thomas Schmidt and William J. Kaiser", title = "Energy-Efficient Sensing with the {Low Power, Energy Aware Processing} ({LEAP}) Architecture", journal = j-TECS, volume = "11", number = "2", pages = "27:1--27:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A broad range of embedded networked sensing (ENS) applications have appeared for large-scale systems, introducing new requirements leading to new embedded architectures, associated algorithms, and supporting software systems. These new requirements include the need for diverse and complex sensor systems that present demands for energy and computational resources, as well as for broadband communication. To satisfy application demands while maintaining critical support for low-energy operation, a new multiprocessor node hardware and software architecture, Low Power Energy Aware Processing (LEAP), has been developed. In this article, we described the LEAP design approach, in which the system is able to adaptively select the most energy-efficient hardware components matching an application's needs. The LEAP platform supports highly dynamic requirements in sensing fidelity, computational load, storage media, and network bandwidth. It focuses on episodic operation of each component and considers the energy dissipation for each platform task by integrating fine-grained energy-dissipation monitoring and sophisticated power-control scheduling for all subsystems, including sensors. In addition to the LEAP platform's unique hardware capabilities, its software architecture has been designed to provide an easy way to use power management interface and a robust, fault-tolerant operating environment and to enable remote upgrade of all software components. LEAP platform capabilities are demonstrated by example implementations, such as a network protocol design and a light source event detection algorithm. Through the use of a distributed node testbed, we demonstrate that by exploiting high energy-efficiency components and enabling proper on-demand scheduling, the LEAP architecture may meet both sensing performance and energy dissipation objectives for a broad class of applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2012:DCR, author = "Weixun Wang and Prabhat Mishra and Ann Gordon-Ross", title = "Dynamic Cache Reconfiguration for Soft Real-Time Systems", journal = j-TECS, volume = "11", number = "2", pages = "28:1--28:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In recent years, efficient dynamic reconfiguration techniques have been widely employed for system optimization. Dynamic cache reconfiguration is a promising approach for reducing energy consumption as well as for improving overall system performance. It is a major challenge to introduce cache reconfiguration into real-time multitasking systems, since dynamic analysis may adversely affect tasks with timing constraints. This article presents a novel approach for implementing cache reconfiguration in soft real-time systems by efficiently leveraging static analysis during runtime to minimize energy while maintaining the same service level. To the best of our knowledge, this is the first attempt to integrate dynamic cache reconfiguration in real-time scheduling techniques. Our experimental results using a wide variety of applications have demonstrated that our approach can significantly reduce the cache energy consumption in soft real-time systems (up to 74\%).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Palermo:2012:VAR, author = "Gianluca Palermo and Cristina Silvano and Vittorio Zaccaria", title = "A Variability-Aware Robust Design Space Exploration Methodology for On-Chip Multiprocessors Subject to Application-Specific Constraints", journal = j-TECS, volume = "11", number = "2", pages = "29:1--29:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220341", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Manufacturing process variation is dramatically becoming one of the most important challenges related to power and performance optimization for sub-90nm CMOS technologies. Process variability impacts the optimization of the target system metrics, that is, performance and energy consumption by introducing fluctuations and unpredictability. Besides, it impacts the parametric yield of the chip with respect to application level constraints by reducing the number of devices working within normal operating conditions. The impact of variability on systems with stringent application-specific requirements (such as portable multimedia and critical embedded systems) is much greater than on general-purpose systems given the emphasis on predictability and reduced operating margins. In this market segment, failing to address such a problem within the early design stages of the chip may lead to missing market deadlines and suffering greater economic losses. In the context of a design space exploration framework for supporting the platform-based design approach, we address the problem of robustness with respect to manufacturing process variations. First, we apply Response Surface Modeling (RSM) techniques to enable an efficient evaluation of the statistical measures of execution time and energy consumption for each system configuration. Then, we apply a robust design space exploration framework to afford the problem of the impact of manufacturing process variations onto the system-level metrics and consequently onto the application-level constraints. We finally provide a comparison of our design space exploration technique with conventional approaches on two different case studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2012:UEP, author = "Yoon Seok Yang and Gwan Choi", title = "Unequal Error Protection Based on {DVFS} for {JSCD} in Low-Power Portable Multimedia Systems", journal = j-TECS, volume = "11", number = "2", pages = "30:1--30:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220342", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a low-power decoder design for joint source-channel decoding (JSCD) based on a novel unequal error protection (UEP) scheme over additive white Gaussian noise (AWGN) channels. Conventional JSCD schemes, adopting low-density parity check (LDPC) codes for multimedia devices, typically operate at a fixed-time decoding loop, regardless of the quality of data received. We present a JSCD scheme that achieves reduction in power through minimum energy decoding and dynamic voltage and frequency scaling (DVFS). Consequently, up to 39\% power reduction is achieved in Foreman, Akiyo, and Mobile video streams without performance degradation in reconstructed video quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Namin:2012:EFF, author = "Ashkan Hosseinzadeh Namin and Huapeng Wu and Majid Ahmadi", title = "An Efficient Finite Field Multiplier Using Redundant Representation", journal = j-TECS, volume = "11", number = "2", pages = "31:1--31:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220343", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "An efficient word-level finite field multiplier using redundant representation is proposed. The proposed multiplier has a significantly higher speed, compared to previously proposed word-level architectures using either redundant representation or optimal normal basis type I, at the expense of moderately higher area complexity. Furthermore, the new design out-performs other similar proposals when considering the product of area and delay as a measure of performance. ASIC Realization of the proposed design using TSMC's 0.18 $ \mu $ m CMOS technology for the binary field size of 163 is also presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leyva-del-Foyo:2012:ITI, author = "Luis E. Leyva-del-Foyo and Pedro Mejia-Alvarez and Dionisio de Niz", title = "Integrated Task and Interrupt Management for Real-Time Systems", journal = j-TECS, volume = "11", number = "2", pages = "32:1--32:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220344", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time scheduling algorithms like RMA or EDF and their corresponding schedulability test have proven to be powerful tools for developing predictable real-time systems. However, the traditional interrupt management model presents multiple inconsistencies that break the assumptions of many of the real-time scheduling tests, diminishing its utility. In this article, we analyze these inconsistencies and present a model that resolves them by integrating interrupts and tasks in a single scheduling model. We then use the RMA theory to calculate the cost of the model and analyze the circumstances under which it can provide the most value. This model was implemented in a kernel module. The portability of the design of our module is discussed in terms of its independence from both the hardware and the kernel. We also discuss the implementation issues of the model over conventional PC hardware, along with its cost and novel optimizations for reducing the overhead. Finally, we present our experimental evaluation to show evidence of its temporal determinism and overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Garg:2012:IMP, author = "Siddharth Garg and Diana Marculescu", title = "On the Impact of Manufacturing Process Variations on the Lifetime of Sensor Networks", journal = j-TECS, volume = "11", number = "2", pages = "33:1--33:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220345", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The lifetime of individual nodes in a sensor network depends strongly on the leakage power of the nodes in idle state. With technology scaling, variability in leakage power dissipation of sensor nodes will cause increased variability in their lifetimes. In this article, we analyze how the lifetime variations of sensor nodes affect the performance of the sensor network as a whole. We demonstrate the use of the proposed framework to explore deployment cost versus performance trade-offs for sensor networks. Results indicate that up to 37\% improvement in the critical lifetime of a sensor network can be obtained with a 20\% increase in deployment cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Blech:2012:GIB, author = "Jan Olaf Blech and Micha{\"e}l P{\'e}rin", title = "Generating Invariant-Based Certificates for Embedded Systems", journal = j-TECS, volume = "11", number = "2", pages = "34:1--34:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220346", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Automatic verification tools, such as model checkers and tools based on static analysis or on abstract interpretation, have become popular in software and hardware development. They increase confidence and potentially provide rich feedback. However, with increasing complexity, verification tools themselves are more likely to contain errors. In contrast to automatic verification tools, higher-order theorem provers use mathematically founded proof strategies checked by a small proof checker to guarantee selected properties. Thus, they enjoy a high level of trustability. Properties of software and hardware systems and their justifications can be encapsulated into a certificate, thereby guaranteeing correctness of the systems, with respect to the properties. These results offer a much higher degree of confidence than results achieved by verification tools. However, higher-order theorem provers are usually slow, due to their general and minimalistic nature. Even for small systems, a lot of human interaction is required for establishing a certificate. In this work, we combine the advantages of automatic verification tools (i.e., speed and automation) with those of higher-order theorem provers (i.e., high level of trustability). The verification tool generates a certificate for each invocation. This is checked by the higher-order theorem prover, thereby guaranteeing the desired property. The generation of certificates is much easier than producing the analysis results of the verification tool in the first place. In our work, we are able to create certificates that come with an algorithmic description of the proof of the desired property as justification. We concentrate on verification tools that generate invariants of systems and certify automatically that these do indeed hold. Our approach is applied to the certification of the verdicts of a deadlock-detection tool for an asynchronous component-based language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jeong:2012:PLT, author = "Jaein Jeong and David Culler", title = "Predicting the Long-Term Behavior of a Micro-Solar Power System", journal = j-TECS, volume = "11", number = "2", pages = "35:1--35:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220347", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Micro-solar power system design is challenging because it must address long-term system behavior under highly variable solar energy conditions and consider a large space of design options. Several micro-solar power systems and models have been made, validating particular points in the whole design space. We provide a general architecture of micro-solar power systems---comprising key components and interconnections among the components---and formalize each component in an analytical or empirical model of its behavior. To model the variability of solar energy, we provide three solar radiation models, depending on the degree of information available: an astronomical model for ideal conditions, an obstructed astronomical model for estimating solar radiation under the presence of shadows and obstructions, and a weather-effect model for estimating solar radiation under weather variation. Our solar radiation models are validated with a concrete design, the HydroWatch node, thus achieving small deviation from the long-term measurement. They can be used in combination with other micro-solar system models to improve the utility of the load and estimate the behavior of micro-solar power systems more accurately. Thus, our solar radiation models provide more accurate estimations of solar radiation and close the loop for micro-solar power system modeling.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Smith:2012:OSH, author = "Melissa C. Smith and Gregory D. Peterson", title = "Optimization of Shared High-Performance Reconfigurable Computing Resources", journal = j-TECS, volume = "11", number = "2", pages = "36:1--36:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220348", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the field of high-performance computing, systems harboring reconfigurable devices, such as field-programmable gate arrays (FPGAs), are gaining more widespread interest. Such systems range from supercomputers with tightly coupled reconfigurable hardware to clusters with reconfigurable devices at each node. The use of these architectures for scientific computing provides an alternative for computationally demanding problems and has advantages in metrics, such as operating cost/performance and power/performance. However, performance optimization of these systems can be challenging even with knowledge of the system's characteristics. Our analytic performance model includes parameters representing the reconfigurable hardware, application load imbalance across the nodes, background user load, basic message-passing communication, and processor heterogeneity. In this article, we provide an overview of the analytical model and demonstrate its application for optimization and scheduling of high-performance reconfigurable computing (HPRC) resources. We examine cost functions for minimum runtime and other optimization problems commonly found in shared computing resources. Finally, we discuss additional scheduling issues and other potential applications of the model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2012:EEA, author = "Kyoungwoo Lee and Nikil Dutt and Nalini Venkatasubramanian", title = "{EAVE}: {Error-Aware Video Encoding} Supporting Extended Energy\slash {QoS} Trade-offs for Mobile Embedded Systems", journal = j-TECS, volume = "11", number = "2", pages = "37:1--37:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220349", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy/QoS provisioning is challenging for video applications over lossy wireless network with power-constrained mobile handheld devices. In this work, we exploit the inherent error tolerance of video data to generate a range of acceptable operating points by controlling the amount of errors in the system. In particular, we propose an error-aware video encoding technique, EAVE, that intentionally injects errors while ensuring acceptable QoS. The expanded trade-off space generated by EAVE allows system designers to comparatively evaluate different operating points with varying QoS and energy consumption by aggressively exploiting error-resilience attributes, and could potentially result in significant energy savings. The novelty of our approach resides in active exploitation of errors to vary the operating conditions for further optimization of system parameters. Moreover, we present the adaptivity of our approach by incorporating the feedback from the decoding side to achieve the QoS requirement under the dynamic network status. Our experiments show that EAVE can reduce the energy consumption for an encoding device by up to 37\% for a video conferencing application over a wireless network without quality degradation, compared to a standard video encoding technique over test video streams. Further, our experimental results demonstrate that EAVE can expand the design space by 14 times with respect to energy consumption and by 13 times with respect to video quality (compared to a traditional approach without active error exploitation) on average, over test video streams.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2012:ART, author = "Mingsong Chen and Prabhat Mishra and Dhrubajyoti Kalita", title = "Automatic {RTL} Test Generation from {SystemC TLM} Specifications", journal = j-TECS, volume = "11", number = "2", pages = "38:1--38:??", month = jul, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2220336.2220350", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jul 27 18:57:33 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "SystemC transaction-level modeling (TLM) is widely used to enable early exploration for both hardware and software designs. It can reduce the overall design and validation effort of complex system-on-chip (SOC) architectures. However, due to lack of automated techniques coupled with limited reuse of validation efforts between abstraction levels, SOC validation is becoming a major bottleneck. This article presents a novel top-down methodology for automatically generating register transfer-level (RTL) tests from SystemC TLM specifications. It makes two important contributions: (i) it proposes a method that can automatically generate TLM tests using various coverage metrics, and (ii) it develops a test refinement specification for automatically converting TLM tests to RTL tests in order to reduce overall validation effort. We have developed a tool which incorporates these activities to enable automated RTL test generation from SystemC TLM specifications. Case studies using a router example and a 64-bit Alpha AXP pipelined processor demonstrate that our approach can achieve intended functional coverage of the RTL designs, as well as capture various functional errors and inconsistencies between specifications and implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Plaks:2012:ESS, author = "Toomas P. Plaks", title = "Editorial: Special Section on {CAPA'09}", journal = j-TECS, volume = "11", number = "S2", pages = "39:1--39:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331148", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paul:2012:PRC, author = "Anand Paul and Yung-Chuan Jiang and Jhing-Fa Wang and Jar-Ferr Yang", title = "Parallel Reconfigurable Computing-Based Mapping Algorithm for Motion Estimation in Advanced Video Coding", journal = j-TECS, volume = "11", number = "S2", pages = "40:1--40:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331149", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Computational load of motion estimation in advanced video coding (AVC) standard is significantly high and even worse for HDTV and super-resolution sequences. In this article, a video processing algorithm is dynamically mapped onto a new parallel reconfigurable computing (PRC) architecture which consists of multiple dynamic reconfigurable computing (DRC) units. First, we construct a directed acyclic graph (DAG) to represent video coding algorithms in which motion estimation is the focus. A novel parallel partition approach is then proposed to map motion estimation DAG onto the multiple DRC units in a PRC system. This partitioning algorithm is capable of design optimization of parallel processing reconfigurable systems for a given number of processing elements in different search ranges. This speeds up the video processing with minimum sacrifice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Suris:2012:RSC, author = "Jorge A. Sur{\'\i}s and Adolfo Recio and Peter Athanas", title = "{RapidRadio}: Signal Classification and Radio Deployment Framework", journal = j-TECS, volume = "11", number = "S2", pages = "41:1--41:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331151", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, the RapidRadio framework for signal classification and receiver deployment is discussed. The framework is a productivity-enhancing tool that reduces the required knowledge base for implementing a receiver on an FPGA-based SDR platform. The ultimate objective of this framework is to identify unknown signals and to build FPGA-based receivers capable of receiving them. RapidRadio divides the process of radio creation into two phases; the analysis phase and radio synthesis phase. The analysis phase guides the user through the process of classifying an unknown signal and determining its modulation scheme and parameters, resulting in a radio receiver model. In the second phase, this model is transformed into a functional receiver in an FPGA-based platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mark:2012:HBC, author = "Cindy Mark and Scott Y. L. Chin and Lesley Shannon and Steven J. E. Wilton", title = "Hierarchical Benchmark Circuit Generation for {FPGA} Architecture Evaluation", journal = j-TECS, volume = "11", number = "S2", pages = "42:1--42:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331152", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We describe a stochastic circuit generator that can be used to automatically create benchmark circuits for use in FPGA architecture studies. The circuits consist of a hierarchy of interconnected modules, reflecting the structure of circuits designed using a system-on-chip design flow. Within each level of hierarchy, modules can be connected in a bus, star, or dataflow configuration. Our circuit generator is calibrated based on a careful study of existing system-on-chip circuits. We show that our benchmark circuits lead to more realistic architectural conclusions than circuits generated using previous generators.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reardon:2012:REE, author = "Casey Reardon and Brian Holland and Alan D. George and Greg Stitt and Herman Lam", title = "{RCML}: An Environment for Estimation Modeling of Reconfigurable Computing Systems", journal = j-TECS, volume = "11", number = "S2", pages = "43:1--43:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331153", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Reconfigurable computing (RC) is emerging as a promising area for embedded computing, in which complex systems must balance performance, flexibility, cost, and power. The difficulty associated with RC development suggests improved strategic planning and analysis techniques can save significant development time and effort. This article presents a new abstract modeling language and environment, the RC Modeling Language (RCML), to facilitate efficient design space exploration of RC systems at the estimation modeling level, that is, before building a functional implementation. Two integrated analysis tools and case studies, one analytical and one simulative, are presented illustrating relatively accurate automated analysis of systems modeled in RCML.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DiBiagio:2012:AOA, author = "Andrea {Di Biagio} and Giovanni Agosta and Martino Sykora and Cristina Silvano", title = "Architecture Optimization of Application-Specific Implicit Instructions", journal = j-TECS, volume = "11", number = "S2", pages = "44:1--44:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamic configuration of application-specific implicit instructions has been proposed to better exploit the available parallelism at the instruction level in pipelined processors. The support of such implicit instruction issue-requires the pipeline to be extended with a trigger table that describes the instruction implicitly issued as a response to a value written into a triggering register by a triggering instruction (which may be an add or sub instruction). In this article, we explore the design optimization of the trigger table to maximize the number of instructions that can be implicitly issued while keeping the limited size of the trigger table. The concept of implicitly issued instruction has been formally defined by considering the inter-basic block analysis of control and data dependencies. A compilation tool chain has been developed to automatically identify the optimization opportunities, taking into account the constraints imposed by control and data dependencies as well as by architectural limitations. The proposed solutions have been applied to the case of a baseline scalar MIPS processor where, for the selected set of benchmarks (DSPStone and Mibench/automotive), we obtained an average speedup of 17\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Napapetian:2012:ESS, author = "Ani Napapetian and William Kaiser and Majid Sarrafzadeh", title = "Editorial: Special Section on {WHS'09}", journal = j-TECS, volume = "11", number = "S2", pages = "45:1--45:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331155", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guenterberg:2012:ASR, author = "Eric Guenterberg and Hassan Ghasemzadeh and Roozbeh Jafari", title = "Automatic Segmentation and Recognition in Body Sensor Networks Using a Hidden {Markov} Model", journal = j-TECS, volume = "11", number = "S2", pages = "46:1--46:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331156", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One important application of body sensor networks is action recognition. Action recognition often implicitly requires partitioning sensor data into intervals, then labeling the partitions according to the action that each represents or as a non-action. The temporal partitioning stage is called segmentation, and the labeling is called classification. While many effective methods exist for classification, segmentation remains problematic. We present a technique inspired by continuous speech recognition that combines segmentation and classification using hidden Markov models. This technique is distributed across several sensor nodes. We show the results of this technique and the bandwidth savings over full data transmission.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pradhan:2012:AVJ, author = "Gaurav N. Pradhan and B. Prabhakaran", title = "Analyzing and Visualizing Jump Performance Using Wireless Body Sensors", journal = j-TECS, volume = "11", number = "S2", pages = "47:1--47:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331157", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advancement in technology has led to the deployment of body sensor networks (BSN) to monitor and sense human activity in pervasive environments. Using multiple wireless on-body systems, such as physiological data monitoring and motion capture systems, body sensor network data consists of heterogeneous physiologic and motoric streams that form a multidimensional framework. In this article, we analyze such high-dimensional body sensor network data by proposing an efficient, multidimensional factor analysis technique for quantifying human performance and, at the same time, providing visualization for performances of participants in a low-dimensional space for easier interpretation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thatte:2012:KEE, author = "Gautam Thatte and Ming Li and Sangwon Lee and Adar Emken and Shrikanth Narayanan and Urbashi Mitra and Donna Spruijt-Metz and Murali Annavaram", title = "{KNOWME}: An Energy-Efficient Multimodal Body Area Network for Physical Activity Monitoring", journal = j-TECS, volume = "11", number = "S2", pages = "48:1--48:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331158", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The use of biometric sensors for monitoring an individual's health and related behaviors, continuously and in real time, promises to revolutionize healthcare in the near future. In an effort to better understand the complex interplay between one's medical condition and social, environmental, and metabolic parameters, this article presents the KNOWME platform, a complete, end-to-end, body area sensing system that integrates off-the-shelf biometric sensors with a Nokia N95 mobile phone to continuously monitor the metabolic signals of a subject. With a current focus on pediatric obesity, KNOWME employs metabolic signals to monitor and evaluate physical activity. KNOWME development and in-lab deployment studies have revealed three major challenges: (1) the need for robustness to highly varying operating environments due to subject-induced variability, such as mobility or sensor placement; (2) balancing the tension between achieving high fidelity data collection and minimizing network energy consumption; and (3) accurate physical activity detection using a modest number of sensors. The KNOWME platform described herein directly addresses these three challenges. Design robustness is achieved by creating a three-tiered sensor data collection architecture. The system architecture is designed to provide robust, continuous, multichannel data collection and scales without compromising normal mobile device operation. Novel physical activity detection methods which exploit new representations of sensor signals provide accurate and efficient physical activity detection. The physical activity detection method employs personalized training phases and accounts for intersession variability. Finally, exploiting the features of the hardware implementation, a low-complexity sensor sampling algorithm is developed, resulting in significant energy savings without loss of performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banerjee:2012:BAT, author = "Ayan Banerjee and Sailesh Kandula and Tridib Mukherjee and Sandeep K. S. Gupta", title = "{BAND-AiDe}: a Tool for Cyber-Physical Oriented Analysis and Design of Body Area Networks and Devices", journal = j-TECS, volume = "11", number = "S2", pages = "49:1--49:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331159", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Body area networks (BANs) are networks of medical devices implanted within or worn on the human body. Analysis and verification of BAN designs require (i) early feedback on the BAN design and (ii) high-confidence evaluation of BANs without requiring any hazardous, intrusive, and costly deployment. Any design of BAN further has to ensure (i) the safety of the human body, that is, limiting any undesirable side-effects (e.g., heat dissipation) of BAN operations (involving sensing, computation, and communication among the devices) on the human body, and (ii) the sustainability of the BAN operations, that is, the continuation of the operations under constrained resources (e.g., limited battery power in the devices) without requiring any redeployments. This article uses the Model Based Engineering (MBE) approach to perform design and analysis of BANs. In this regard, first, an abstract cyber-physical model of BANs, called BAN-CPS, is proposed that captures the undesirable side-effects of the medical devices (cyber) on the human body (physical); second, a design and analysis tool, named BAND-AiDe, is developed that allows specification of BAN-CPS using industry standard Abstract Architecture Description Language (AADL) and enables safety and sustainability analysis of BANs; and third, the applicability of BAND-AiDe is shown through a case study using both single and a network of medical devices for health monitoring applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hanson:2012:AFE, author = "Mark A. Hanson and Harry C. {Powell, Jr.} and Adam T. Barth and John Lach", title = "Application-Focused Energy-Fidelity Scalability for Wireless Motion-Based Health Assessment", journal = j-TECS, volume = "11", number = "S2", pages = "50:1--50:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331160", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy-fidelity trade-offs are central to the performance of many technologies, but they are essential in wireless body area sensor networks (BASNs) due to severe energy and processing constraints and the critical nature of certain healthcare applications. On-node signal processing and compression techniques can save energy by greatly reducing the amount of data transmitted over the wireless channel, but lossy techniques, capable of high compression ratios, can incur a reduction in application fidelity. In order to maximize system performance, these trade-offs must be considered at runtime due to the dynamic nature of BASN applications, including sensed data, operating environments, user actuation, etc. BASNs therefore require energy-fidelity scalability, so automated and user-initiated trade-offs can be made dynamically. This article presents a data rate scalability framework within a motion-based health application context which demonstrates the design of efficient and efficacious wireless health systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boulis:2012:IWC, author = "Athanassios Boulis and Yuriy Tselishchev and Lavy Libman and David Smith and Leif Hanlen", title = "Impact of Wireless Channel Temporal Variation on {MAC} Design for Body Area Networks", journal = j-TECS, volume = "11", number = "S2", pages = "51:1--51:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331161", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We investigate the impact of wireless channel temporal variations on the design of medium access control (MAC) protocols for body area networks (BANs). Our measurements-based channel model captures large and small time-scale signal correlations, giving an accurate picture of the signal variation, specifically, the deep fades which are the features that mostly affect the behavior of the MAC. We test the effect of the channel model on the performance of the 802.15.4 MAC both in contention access mode and TDMA access mode. We show that there are considerable differences in the performance of the MAC compared to simulations that do not model channel temporal variation. Furthermore, explaining the behavior of the MAC under a temporal varying channel, we can suggest specific design choices for the emerging BAN MAC standard.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fainekos:2012:ESS, author = "Georgios Fainekos and Eric Goubault and Franjo Ivanci{\'c} and Sriram Sankaranarayanan", title = "Editorial: Special Section {VCPSS'09}", journal = j-TECS, volume = "11", number = "S2", pages = "52:1--52:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331162", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wongpiromsarn:2012:VPC, author = "Tichakorn Wongpiromsarn and Sayan Mitra and Andrew Lamperski and Richard M. Murray", title = "Verification of Periodically Controlled Hybrid Systems: Application to an Autonomous Vehicle", journal = j-TECS, volume = "11", number = "S2", pages = "53:1--53:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331163", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces Periodically Controlled Hybrid Automata (PCHA) for modular specification of embedded control systems. In a PCHA, control actions that change the control input to the plant occur roughly periodically, while other actions that update the state of the controller may occur in the interim. Such actions could model, for example, sensor updates and information received from higher-level planning modules that change the set point of the controller. Based on periodicity and subtangential conditions, a new sufficient condition for verifying invariant properties of PCHAs is presented. For PCHAs with polynomial continuous vector fields, it is possible to check these conditions automatically using, for example, quantifier elimination or sum of squares decomposition. We examine the feasibility of this automatic approach on a small example. The proposed technique is also used to manually verify safety and progress properties of a fairly complex planner-controller subsystem of an autonomous ground vehicle. Geometric properties of planner-generated paths are derived which guarantee that such paths can be safely followed by the controller.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Girard:2012:VSL, author = "Antoine Girard and Gang Zheng", title = "Verification of Safety and Liveness Properties of Metric Transition Systems", journal = j-TECS, volume = "11", number = "S2", pages = "54:1--54:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331164", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider verification problems for transition systems enriched with a metric structure. We believe that these metric transition systems are particularly suitable for the analysis of cyber-physical systems in which metrics can be naturally defined on the numerical variables of the embedded software and on the continuous states of the physical environment. We consider verification of bounded and unbounded safety properties, as well as bounded liveness properties. The transition systems we consider are nondeterministic, finitely branching, and with a finite set of initial states. Therefore, bounded safety/liveness properties can always be verified by exhaustive exploration of the system trajectories. However, this approach may be intractable in practice, as the number of trajectories usually grows exponentially with respect to the considered bound. Furthermore, since the system we consider can have an infinite set of states, exhaustive exploration cannot be used for unbounded safety verification. For bounded safety properties, we propose an algorithm which combines exploration of the system trajectories and state space reduction using merging based on a bisimulation metric. The main novelty compared to an algorithm presented recently by Lerda et al. [2008] consists in introducing a tuning parameter that improves the performance drastically. We also establish a procedure that allows us to prove unbounded safety from the result of the bounded safety algorithm via a refinement step. We then adapt the algorithm to handle bounded liveness verification. Finally, the effectiveness of the approach is demonstrated by applying it to the analysis of implementations of an embedded control loop.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seshia:2012:QAS, author = "Sanjit A. Seshia and Alexander Rakhlin", title = "Quantitative Analysis of Systems Using Game-Theoretic Learning", journal = j-TECS, volume = "11", number = "S2", pages = "55:1--55:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331165", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The analysis of quantitative properties, such as timing and power, is central to the design of reliable embedded software and systems. However, the verification of such properties on a program is made difficult by their heavy dependence on the program's environment, such as the processor it runs on. Modeling the environment by hand can be tedious, error prone, and time consuming. In this article, we present a new game-theoretic approach to analyzing quantitative properties that is based on performing systematic measurements to automatically learn a model of the environment. We model the problem as a game between our algorithm (player) and the environment of the program (adversary) in which the player seeks to accurately predict the property of interest, while the adversary sets environment states and parameters. To solve this problem, we employ a randomized strategy that repeatedly tests the program along a linear-sized set of program paths called basis paths, using the resulting measurements to infer a weighted-graph model of the environment from which quantitative properties can be predicted. Test cases are automatically generated using satisfiability modulo theories (SMT) solving. We prove that our algorithm can, under certain assumptions and with arbitrarily high probability, accurately predict properties such as worst-case execution time or estimate the distribution of execution times. Experimental results for execution time analysis demonstrate that our approach is efficient, accurate, and highly portable.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2012:MCB, author = "Lan Wu and Wei Zhang", title = "A Model Checking Based Approach to Bounding Worst-Case Execution Time for Multicore Processors", journal = j-TECS, volume = "11", number = "S2", pages = "56:1--56:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331166", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As multicore processors are increasingly adopted in industry, it has become a great challenge to accurately bound the worst-case execution time (WCET) for real-time systems running on multicore chips. This is particularly true because of the inter-thread interferences in accessing shared resources on multicores, such as shared L2 caches, which can significantly affect the performance but are very difficult to be estimated statically. This article proposes an approach to analyzing WCET for multicore processors with shared L2 instruction caches by using a model checking based method. We model each concurrent real-time thread, including the inter-thread cache interferences with a PROMELA process, and derive the WCET by using a binary search algorithm. To reduce the state explosion problem, we propose several techniques for reducing the memory consumption by exploiting domain-specific information. Our experiments indicate that compared to the static analysis technique based on extended ILP (integer linear programming), our approach improves the tightness of WCET estimation by more than 31.1\% for the benchmarks we studied. However, due to the inherent complexity of multicore timing analysis and the state explosion problem, the model checking based approach currently can only work with small real-time kernels for dual-core processors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tang:2012:UMS, author = "Qinghui Tang and Sandeep K. S. Gupta and Georgios Varsamopoulos", title = "A Unified Methodology for Scheduling in Distributed Cyber-Physical Systems", journal = j-TECS, volume = "11", number = "S2", pages = "57:1--57:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331167", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A distributed cyber-physical system (DCPS) may receive and induce energy-based interference to and from its environment. This article presents a model and an associated methodology that can be used to (i) schedule tasks in DCPSs to ensure that the thermal effects of the task execution are within acceptable levels, and (ii) verify that a given schedule meets the constraints. The model uses coarse discretization of space and linearity of interference. The methodology involves characterizing the interference of the task execution and fitting it into the model, then using the fitted model to verify a solution or explore the solution space.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nghiem:2012:TTI, author = "Truong Nghiem and George J. Pappas and Rajeev Alur and Antoine Girard", title = "Time-Triggered Implementations of Dynamic Controllers", journal = j-TECS, volume = "11", number = "S2", pages = "58:1--58:??", year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2331147.2331168", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 6 09:57:10 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Bridging the gap between model-based design and platform-based implementation is one of the critical challenges for embedded software systems. In the context of embedded control systems that interact with an environment, a variety of errors due to quantization, delays, and scheduling policies may generate executable code that does not faithfully implement the model-based design. In this article, we show that the performance gap between the model-level semantics of linear dynamic controllers, for example, the proportional-integral-derivative (PID) controllers and their implementation-level semantics, can be rigorously quantified if the controller implementation is executed on a predictable time-triggered architecture. Our technical approach uses lifting techniques for periodic time-varying linear systems in order to compute the exact error between the model semantics and the execution semantics. Explicitly computing the impact of the implementation on overall system performance allows us to compare and partially order different implementations with various scheduling or timing characteristics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2012:UAS, author = "Qi Dong and Donggang Liu", title = "Using Auxiliary Sensors for Pairwise Key Establishment in {WSN}", journal = j-TECS, volume = "11", number = "3", pages = "59:1--59:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345771", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many techniques have been developed recently for establishing pairwise keys in sensor networks. However, some of them are vulnerable to a few compromised sensor nodes, while others could involve expensive protocols for establishing keys. This article introduces a much better alternative that can achieve both high resilience to node compromises and high efficiency in key establishment. The main idea is to deploy a small number of additional sensor nodes, called assisting nodes, to help key establishment between sensor nodes. The proposed approach has many advantages over existing approaches. In particular, a sensor node only needs to make a few local communications and perform a few efficient hash operations to setup a key with any other sensor node in the network at a very high probability. The majority of sensor nodes only need to store a single key. Besides, it also provides high resilience to node compromises. The theoretical analysis, simulation studies, and experiments on TelosB sensor motes also demonstrate the advantages of this key establishment protocol in sensor networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Arora:2012:ILM, author = "Divya Arora and Najwa Aaraj and Anand Raghunathan and Niraj K. Jha", title = "{INVISIOS}: a Lightweight, Minimally Intrusive Secure Execution Environment", journal = j-TECS, volume = "11", number = "3", pages = "60:1--60:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345772", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many information security attacks exploit vulnerabilities in ``trusted'' and privileged software executing on the system, such as the operating system (OS). On the other hand, most security mechanisms provide no immunity to security-critical user applications if vulnerabilities are present in the underlying OS. While technologies have been proposed that facilitate isolation of security-critical software, they require either significant computational resources and are hence not applicable to many resource-constrained embedded systems, or necessitate extensive redesign of the underlying processors and hardware. In this work, we propose INVISIOS: a lightweight, minimally intrusive hardware-software architecture to make the execution of security-critical software invisible to the OS, and hence protected from its vulnerabilities. The INVISIOS software architecture encapsulates the security-critical software into a self-contained software module. While this module is part of the kernel and is run with kernel-level privileges, its code, data, and execution are transparent to and protected from the rest of the kernel. The INVISIOS hardware architecture consists of simple add-on hardware components that are responsible for bootstrapping the secure core, ensuring that it is exercised by applications in only permitted ways, and enforcing the isolation of its code and data. We implemented INVISIOS by enhancing a full-system emulator and Linux to model the proposed software and hardware enhancements, and applied it to protect a commercial cryptographic library. Our experiments demonstrate that INVISIOS is capable of facilitating secure execution at very small overheads, making it suitable for resource-constrained embedded systems and systems-on-chip.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Izosimov:2012:SOF, author = "Viacheslav Izosimov and Paul Pop and Petru Eles and Zebo Peng", title = "Scheduling and Optimization of Fault-Tolerant Embedded Systems with Transparency\slash Performance Trade-Offs", journal = j-TECS, volume = "11", number = "3", pages = "61:1--61:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345773", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose a strategy for the synthesis of fault-tolerant schedules and for the mapping of fault-tolerant applications. Our techniques handle transparency/performance trade-offs and use the fault-occurrence information to reduce the overhead due to fault tolerance. Processes and messages are statically scheduled, and we use process reexecution for recovering from multiple transient faults. We propose a fine-grained transparent recovery, where the property of transparency can be selectively applied to processes and messages. Transparency hides the recovery actions in a selected part of the application so that they do not affect the schedule of other processes and messages. While leading to longer schedules, transparent recovery has the advantage of both improved debuggability and less memory needed to store the fault-tolerant schedules.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2012:PAA, author = "Shengqi Yang and Pallav Gupta and Marilyn Wolf and Dimitrios Serpanos and Vijaykrishnan Narayanan and Yuan Xie", title = "Power Analysis Attack Resistance Engineering by Dynamic Voltage and Frequency Scaling", journal = j-TECS, volume = "11", number = "3", pages = "62:1--62:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345774", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a novel approach to cryptosystem design to prevent power analysis attacks. Such attacks infer program behavior by continuously monitoring the power supply current going into the processor core. They form an important class of security attacks. Our approach is based on dynamic voltage and frequency scaling (DVFS), which hides processor state to make it harder for an attacker to gain access to a secure system. Three designs are studied to test the efficacy of the DVFS method against power analysis attacks. The advanced realization of our cryptosystem is presented which achieves enough high power and time trace entropies to block various kinds of power analysis attacks in the DES algorithm. We observed 27\% energy reduction and 16\% time overhead in these algorithms. Finally, DVFS hardness analysis is presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shokry:2012:HSS, author = "Hesham Shokry and Hatem M. El-Boghdadi", title = "On Heuristic Solutions to the Simple Offset Assignment Problem in Address-Code Optimization", journal = j-TECS, volume = "11", number = "3", pages = "63:1--63:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345775", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The increasing demand for more functionality in embedded systems applications nowadays requires efficient generation of compact code for embedded DSP processors. Because such processors have highly irregular data-paths, compilers targeting those processors are challenged with the automatic generation of optimized code with competent quality comparable to hand-crafted code. A major issue in code-generation is to optimize the placement of program variables in ROM relative to each other so as to reduce the overhead instructions dedicated for address computations. Modern DSP processors are typically shipped with a feature called Address Generation Unit (AGU) that provides efficient address-generation instructions for accessing program variables. Compilers targeting those processors are expected to exploit the AGU to optimize variables assignment. This article focuses on one of the basic offset-assignment problems; the Simple Offset Assignment (SOA) problem, where the AGU has only one Address Register and no Modify Registers. The notion of Tie-Break Function, TBF, introduced by Leupers and Marwedel [1996], has been used to guide the placement of variables in memory. In this article, we introduce a more effective form of the TBF; the Effective Tie-Breaking Function, ETBF, and show that the ETBF is better at guiding the variables placement process. Underpinning ETBF is the fact that program variables are placed in memory in sequence, with each variable having only two neighbors. We applied our technique to randomly generated graphs as well as to real-world code from the OffsetStone testbench [2010]. In previous work [Ali et al. 2008], our technique showed up to 7\% reduction in overhead when applied to randomly-generated problem instances. We report in this article on a further experiment of our technique on real-code from the OffsetStone testbench. Despite the substantial improvement our technique has achieved when applied to random problem instances, we found that it shows slight overhead reduction when applied to real-world instances in OffsetStone, which agrees with similar existing experiments. We analyze these results and show that the ETBF defaults to TBF.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Girodias:2012:IMO, author = "Bruno Girodias and Luiza Gheorghe Iugan and Youcef Bouchebaba and Gabriela Nicolescu and El Mostapha Abouhamid and Michel Langevin and Pierre Paulin", title = "Integrating Memory Optimization with Mapping Algorithms for Multi-Processors System-on-Chip", journal = j-TECS, volume = "11", number = "3", pages = "64:1--64:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345776", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Due to their great ability to parallelize at a very high integration level, Multi-Processors Systems-on-Chip (MPSoCs) are good candidates for systems and applications such as multimedia. Memory is becoming a key player for significant improvements in these applications (power, performance and area). The large amount of data manipulated by these applications requires high-capacity computing and memory. Lately, new programming models have been introduced. This leads to the need of new optimization and mapping techniques suitable for embedded systems and their programming models. This article presents novel approaches for combining memory optimization with mapping of data-driven applications while considering anti-dependence conflicts. Two different approaches are studied and integrated with existing mapping algorithms. The first approach (based on heuristic algorithms) keeps the graph transformation for memory optimization stage from the mapping stage and enables their combination in a design flow. The second approach (based on evolutionary algorithms) combines these two stages and integrates them in a unique stage. Some significant improvements are obtained for memory gain, communication load and physical links.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhong:2012:SNL, author = "Ziguo Zhong and Tian He", title = "Sensor Node Localization with Uncontrolled Events", journal = j-TECS, volume = "11", number = "3", pages = "65:1--65:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345777", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Event-driven localization has been proposed as a low-cost solution for node positioning in wireless sensor networks. In order to eliminate the costly requirement for accurate event control in existing methods, we present a practical design using uncontrolled events. The main idea is to estimate both event generation parameters and the location of sensor nodes simultaneously, by processing node sequences that can be easily obtained from event detections. Besides the basic design, we proposed two enhancements to further extract information embedded in node orderings for two scenarios: (i) node density is high; and (ii) abundant events are available. To demonstrate the generality of our design, both straight-line scan and circular wave propagation events are addressed in the article, and we evaluated the design with extensive simulation as well as a testbed implementation with 41 MICAz motes. Results show that with only randomly generated events, our design can effectively localize nodes with great flexibility while adding little extra cost at the resource constrained sensor node side. In addition, localization via uncontrolled events provides a potential option of achieving node positioning through long-term ambient events.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2012:ECI, author = "Karthik Kumar and Yamini Nimmagadda and Yung-Hsiang Lu", title = "Energy Conservation for Image Retrieval on Mobile Systems", journal = j-TECS, volume = "11", number = "3", pages = "66:1--66:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345779", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Mobile systems such as PDAs and cell phones play an increasing role in handling visual contents such as images. Thousands of images can be stored in a mobile system with the advances in storage technology: this creates the need for better organization and retrieval of these images. Content Based Image Retrieval (CBIR) is a method to retrieve images based on their visual contents. In CBIR, images are compared by matching their numerical representations called features; CBIR is computation and memory intensive and consumes significant amounts of energy. This article examines energy conservation for CBIR on mobile systems. We present three improvements to save energy while performing the computation on the mobile system: selective loading, adaptive loading, and caching features in memory. Using these improvements adaptively reduces the features to be loaded into memory for each search. The reduction is achieved by estimating the difficulty of the search. If the images in the collection are dissimilar, fewer features are sufficient; less computation is performed and energy can be saved. We also consider the effect of consecutive user queries and show how features can be cached in memory to save energy. We implement a CBIR algorithm on an HP iPAQ hw6945 and show that these improvements can save energy and allow CBIR to scale up to 50,000 images on a mobile system. We further investigate if energy can be saved by migrating parts of the computation to a server, called computation offloading. We analyze the impact of the wireless bandwidth, server speed, number of indexed images, and the number of image queries on the energy consumption. Using our scheme, CBIR can be made energy efficient under all conditions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2012:IMR, author = "Jaehwan John Lee and Xiang Xiao", title = "Instant Multiunit Resource Hardware Deadlock Detection Scheme for System-on-Chips", journal = j-TECS, volume = "11", number = "3", pages = "67:1--67:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345780", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, a brand new method of determining deadlock is presented. Most previous deadlock detection methods are algorithmic in the sense that they usually leverage some forms of Resource Allocation Graph (RAG) representations and then algorithms are devised to manipulate such representations in order to detect deadlock using information contained in the graph. Different from all previous methods, the proposed method actualizes the RAG with a digital circuit and uses it as a token-transmitting network. By supplying special input signals (tokens) to the network and observing the output tokens from the network, it is easier to identify which process nodes are reachable from each resource node in the graph. Using the reachability information, deadlock can be detected immediately. The time required to obtain the reachability information is determined by how fast the combinational circuit operates. Compared with previous algorithmic methods, the proposed deadlock detection can be deemed instant. We show that the proposed method is an order of magnitude faster than the previous fastest hardware mechanism and several orders of magnitude faster than traditional software-based algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zappi:2012:NLP, author = "Piero Zappi and Daniel Roggen and Elisabetta Farella and Gerhard Tr{\"o}ster and Luca Benini", title = "Network-Level Power-Performance Trade-Off in Wearable Activity Recognition: a Dynamic Sensor Selection Approach", journal = j-TECS, volume = "11", number = "3", pages = "68:1--68:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345781", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wearable gesture recognition enables context aware applications and unobtrusive HCI. It is realized by applying machine learning techniques to data from on-body sensor nodes. We present an gesture recognition system minimizing power while maintaining a run-time application defined performance target through dynamic sensor selection. Compared to the non managed approach optimized for recognition accuracy (95\% accuracy), our technique can extend network lifetime by 4 times with accuracy {$>$90}\% and by 9 times with accuracy {$>$70}\%. We characterize the approach and outline its applicability to other scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ambrose:2012:RII, author = "Jude A. Ambrose and Roshan G. Ragel and Sri Parameswaran", title = "Randomized Instruction Injection to Counter Power Analysis Attacks", journal = j-TECS, volume = "11", number = "3", pages = "69:1--69:??", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2345770.2345782", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 22 10:44:19 MDT 2012", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Side-channel attacks in general and power analysis attacks in particular are becoming a major security concern in embedded systems. Countermeasures proposed against power analysis attacks are data and table masking, current flattening, dummy instruction insertion and bit-flips balancing. All these techniques are either susceptible to multi-order power analysis attack, not sufficiently generic to cover all encryption algorithms, or burden the system with high area, run-time or energy cost. In this article, we propose a randomized instruction injection technique (RIJID) that overcomes the pitfalls of previous countermeasures. RIJID scrambles the power profile of a cryptographic application by injecting random instructions at random points of execution and therefore protects the system against power analysis attacks. Two different ways of triggering the instruction injection are also presented: (1) softRIJID, a hardware/software approach, where special instructions are used in the code for triggering the injection at runtime; and (2) autoRIJID, a hardware approach, where the code injection is triggered by the processor itself via detecting signatures of encryption routines at runtime. A novel signature detection technique is also introduced for identifying encryption routines within application programs at runtime. Further, a simple obfuscation metric (RIJIDindex) based on cross-correlation that measures the scrambling provided by any code injection technique is introduced, which coarsely indicates the level of scrambling achieved. Our processor models cost 1.9\% additional area in the hardware/software approach and 1.2\% in the hardware approach for a RISC based processor, and costs on average 29.8\% in runtime and 27.1\% in energy for the former and 25.0\% in runtime and 28.5\% in energy for the later, for industry standard cryptographic applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pimentel:2012:ISS, author = "Andy D. Pimentel and Naehyuck Chang and Mladen Berekovic", title = "Introduction to special section {ESTIMedia'09}", journal = j-TECS, volume = "11", number = "4", pages = "70:1--70:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paterna:2012:VTW, author = "Francesco Paterna and Andrea Acquaviva and Francesco Papariello and Giuseppe Desoli and Luca Benini", title = "Variability-tolerant workload allocation for {MPSoC} energy minimization under real-time constraints", journal = j-TECS, volume = "11", number = "4", pages = "71:1--71:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sub-50nm CMOS technologies are affected by significant variability, which causes power and performance variations among nominally similar cores in MPSoC platforms. This undesired heterogeneity threatens execution predictability and energy efficiency. We propose two techniques to allocate sets of barrier-synchronized tasks. The first technique models allocation as an ILP and achieves optimal results, but requires an offline solver. The second technique adopts a two-stage heuristic approach, and it can be adapted to work online. We tested our approach on the virtual prototype of a next-generation industrial multicore platform. Experimental results demonstrate that our approach minimizes deadline violations while increasing energy efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsutsui:2012:HTP, author = "Hiroshi Tsutsui and Koichi Hattori and Hiroyuki Ochi and Yukihiro Nakamura", title = "A high-throughput pipelined parallel architecture for {JPEG XR} encoding", journal = j-TECS, volume = "11", number = "4", pages = "72:1--72:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "JPEG XR is an emerging image coding standard, based on HD Photo developed by Microsoft Corporation. It supports high compression performance twice as high as the de facto image coding system, namely, JPEG, and also has an advantage over JPEG 2000 in terms of computational cost. JPEG XR is expected to be widespread for many devices including embedded systems in the near future. In this article, we propose a novel architecture for JPEG XR encoding. In previous architectures, entropy coding was the throughput bottleneck because it was implemented as a sequential algorithm to handle data with dependency. We found that there is no dependency in intra-macroblock data, and we could safely pipeline all the encoding processes including the entropy coding. In addition, each module of our architecture, which can be regarded as a pipeline stage, can be parallelized. As a result, our architecture can achieve 12.8 pixel/cycle at its maximum. To demonstrate our architecture, we designed three versions of our architecture with different degrees of parallelism of one, two, and four. Our four-way parallel architecture achieves 579 Mpixel/sec at 181MHz clock frequency for grayscale images.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2012:XFM, author = "Minyoung Kim and Mark-Oliver Stehr and Carolyn Talcott and Nikil Dutt and Nalini Venkatasubramanian", title = "{xTune}: a formal methodology for cross-layer tuning of mobile embedded systems", journal = j-TECS, volume = "11", number = "4", pages = "73:1--73:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Resource-limited mobile embedded systems can benefit greatly from dynamic adaptation of system parameters. We propose a novel approach that employs iterative tuning using lightweight formal verification at runtime with feedback for dynamic adaptation. One objective of this approach is to enable trade-off analysis across multiple layers (e.g., application, middleware, OS) and predict the possible property violations as the system evolves dynamically over time. Specifically, an executable formal specification is developed for each layer of the mobile system under consideration. The formal specification is then analyzed using statistical property checking and statistical quantitative analysis, to determine the impact of various resource management policies for achieving desired timing/QoS properties. Integration of formal analysis with dynamic behavior from system execution results in a feedback loop that enables model refinement and further optimization of policies and parameters. We demonstrate the applicability of this approach to the adaptive provisioning of resource-limited distributed real-time systems using a mobile multimedia case study.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dick:2012:ISS, author = "Robert Dick and Li Shang and Nikil Dutt", title = "Introduction to special section {SCPS'09}", journal = j-TECS, volume = "11", number = "4", pages = "74:1--74:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362341", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Koutsoukos:2012:PAM, author = "Xenofon Koutsoukos and Nicholas Kottenstette and Joseph Hall and Emeka Eyisi and Heath Leblanc and Joseph Porter and Janos Sztipanovits", title = "A passivity approach for model-based compositional design of networked control systems", journal = j-TECS, volume = "11", number = "4", pages = "75:1--75:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362342", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The integration of physical systems through computing and networking has become pervasive, a trend now known as cyber-physical systems (CPS). Functionality in CPS emerges from the interaction of networked computational and physical objects. System design and integration are particularly challenging because fundamentally different physical and computational design concerns intersect. The impact of these interactions is the loss of compositionality which creates tremendous challenges. The key idea in this article is to use passivity for decoupling the control design of networked systems from uncertainties such as time delays and packet loss, thus providing a fundamental simplification strategy that limits the complexity of interactions. The main contribution is the application of the approach to an experimental case study of a networked multi-robot system. We present a networked control architecture that ensures the overall system remains stable in spite of implementation uncertainties such as network delays and data dropouts, focusing on the technical details required for the implementation. We describe a prototype domain-specific modeling language and automated code generation tools for the design of networked control systems on top of passivity that facilitate effective system configuration, deployment, and testing. Finally, we present experimental evaluation results that show decoupling of interlayer interactions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shin:2012:CTC, author = "Donghwa Shin and Jaehyun Park and Younghyun Kim and Jaeam Seo and Naehyuck Chang", title = "Control-theoretic cyber-physical system modeling and synthesis: a case study of an active direct methanol fuel cell", journal = j-TECS, volume = "11", number = "4", pages = "76:1--76:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362343", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A joint optimization of the physical system and the cyber world is one of the key problems in the design of a cyber-physical system (CPS). The major mechanical forces and/or chemical reactions in a plant are commonly modified by actuators in the balance-of-plant (BOP) system. More powerful actuators requires more power, but generally increase the response of the physical system powered by the electrical energy generated by the physical system. To maximize the overall output of a power generating plant therefore requires joint optimization of the physical system and the cyber world, and this is a key factor in the design of a CPS. We introduce a systematic approach to the modeling and synthesis of a CPS that emphasize joint power optimization, using an active direct methanol fuel cell (DMFC) as a case study. Active DMFC systems are superior to passive DMFCs in terms of fuel efficiency thanks to their BOP system, which includes pumps, air blowers, and fans. However, designing a small-scale active DMFC with the best overall system efficiency requires the BOP system to be jointly optimized with the DMFC stack operation, because the BOP components are powered by the stack. Our approach to this synthesis problem involves (i) BOP system characterization, (ii) integrated DMFC system modeling, (iii) configuring a system for the maximum net power output through design space exploration, (iv) synthesis of feedback control tasks, and (v) implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Malik:2012:SLA, author = "Avinash Malik and Zoran Salcic and Christopher Chong and Salman Javed", title = "System-level approach to the design of a smart distributed surveillance system using {SystemJ}", journal = j-TECS, volume = "11", number = "4", pages = "77:1--77:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362344", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Distributed surveillance systems represent a class of sensor networks used for object location and tracking, road traffic monitoring, security, and other purposes. They are very complex to describe, design, and run. Because of their sensitivity, they need to be carefully designed and validated. We present a system-level approach to modeling and designing such systems using a new system-level programming language, SystemJ, which enables designers to describe computational and communication parts of such applications in a highly abstract manner. The designed system can be modeled and validated even before deployment and in that way contribute to the overall reliability and trustworthiness of such systems. As an additional tool, the design environment for specification of the surveillance system topology, physical and communication properties, selected sensors and their interconnectivity with the computing resources was developed. This tool enables easy composition of multiple sensors and their respective controllers, capturing changes of configuration of the system and underlying communication, and automatic generation of the formal description of the surveillance system. This description is then used for the generation of executable code and/or the templates for detailed SystemJ application-specific code, as well as for generation of the operator GUI in a surveillance system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yoong:2012:ICC, author = "Li Hsien Yoong and Partha S. Roop and Zoran Salcic", title = "Implementing constrained cyber-physical systems with {IEC 61499}", journal = j-TECS, volume = "11", number = "4", pages = "78:1--78:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362345", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPS) are integrations of computation and control with sensing and actuation of the physical environment. Typically, such systems consist of embedded computers that monitor and control physical processes in a feedback loop. While modern electronic systems are increasingly characterized as CPS, their design and synthesis still rely on traditional methods, which lack systematic and automated techniques for accomplishment. Recently, IEC 61499 has been proposed as a standard for designing industrial process-control and measurement systems. It prescribes a component-based approach for developing industrial automation software using function blocks. Executable code can then be automatically generated and simulated from these function blocks. This bodes well for designers of CPS, who are more likely to be experts in specific industrial domains, rather than in computer science. The intuitive graphical nature and automatic code synthesis of IEC 61499 programs will alleviate the programming burden of industrial engineers, while ensuring more reliable software. While software synthesis from IEC 61499 programs is not new, the generation of efficient code from them has been wanting. This has made it difficult for function blocks to be used in software development for resource-constrained embedded controllers commonly employed in CPS. To address this, we present an approach that can generate very efficient code from function block descriptions. Experimental results from a benchmark suite shows that our approach produces substantially faster and smaller code compared to existing techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Subramanian:2012:GOP, author = "Varun Subramanian and Michael Gilberti and Alex Doboli and Dan Pescaru", title = "A goal-oriented programming framework for grid sensor networks with reconfigurable embedded nodes", journal = j-TECS, volume = "11", number = "4", pages = "79:1--79:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362346", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPS) are large, distributed embedded systems integrated with various sensors and actuators. CPS are rapidly emerging as an important computing paradigm in many modern applications. Developing CPS applications is currently challenging due to the sheer complexity of the related functionality as well as the broad set of constraints and unknowns that must be tackled during operation. This article presents a novel high-level programming model and the supporting optimization and middleware routines for executing applications on physically-distributed networks of reconfigurable embedded systems. The proposed model describes the optimization goals, sensing inputs, actuation outputs, events, and constraints of an application, while leaving to the compiler and execution environment the task of optimally implementing the derived functionality. Experimental results discuss the additional performance optimizations enabled by the proposed model, and the timing and power consumption of the middleware routines, and present a temperature monitoring application implemented on a network of reconfigurable, embedded processors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tan:2012:ACF, author = "Rui Tan and Guoliang Xing and Xue Liu and Jianguo Yao and Zhaohui Yuan", title = "Adaptive calibration for fusion-based cyber-physical systems", journal = j-TECS, volume = "11", number = "4", pages = "80:1--80:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362347", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many Cyber-Physical Systems (CPS) are composed of low-cost devices that are deeply integrated with physical environments. As a result, the performance of a CPS system is inevitably undermined by various physical uncertainties, which include stochastic noises, hardware biases, unpredictable environment changes, and dynamics of the physical process of interest. Traditional solutions to these issues (e.g., device calibration and collaborative signal processing) work in an open-loop fashion and hence often fail to adapt to the uncertainties after system deployment. In this article, we propose an adaptive system-level calibration approach for a class of CPS systems whose primary objective is to detect events or targets of interest. Through collaborative data fusion, our calibration approach features a feedback control loop that exploits system heterogeneity to mitigate the impact of aforementioned uncertainties on the system performance. In contrast to existing heuristic-based solutions, our control-theoretical calibration algorithm can ensure provable system stability and convergence. We also develop a routing algorithm for fusion-based multihop CPS systems that is robust to communication unreliability and delay. Our approach is evaluated by both experiments on a testbed of Tmotes as well as extensive simulations based on data traces gathered from a real vehicle detection experiment. The results demonstrate that our calibration algorithm enables a CPS system to maintain the optimal sensing performance in the presence of various system and environmental dynamics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nam:2012:MTI, author = "Min-Young Nam and Kyungtae Kang and Rodolfo Pellizzoni and Kyung-Joon Park and Jung-Eun Kim and Lui Sha", title = "Modeling towards incremental early analyzability of networked avionics systems using virtual integration", journal = j-TECS, volume = "11", number = "4", pages = "81:1--81:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362348", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the advance of hardware technology, more features are incrementally added to already existing networked systems. Avionics has a stronger tendency to use preexisting applications due to its complexity and scale. As resource sharing becomes intense among the network and the computing modules, it has become a difficult task for the system designer to make confident architectural decisions even for incremental changes. Providing a tailored environment to model and analyze incremental changes requires a combination of software tools and hardware support. We have built a virtual integration tool called ASIIST which can provide a worst-case end-to-end latency of data that is sent through a network and the internal bus architecture of the end-systems. Also, we have devised a new real-time switching algorithm which guarantees the worst-case network delay of preexisting network traffic under feasible conditions. With the real-time switch support, ASIIST can provide an early modularized analysis of the end-to-end latency to make architectural design choices and incremental changes easier for the user.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pajic:2012:RAE, author = "Miroslav Pajic and Alexander Chernoguzov and Rahul Mangharam", title = "Robust architectures for embedded wireless network control and actuation", journal = j-TECS, volume = "11", number = "4", pages = "82:1--82:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362349", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Networked cyber-physical systems are fundamentally constrained by the tight coupling and closed-loop control of physical processes. To address actuation in such closed-loop wireless control systems there is a strong need to rethink the communication architectures and protocols for reliability, coordination, and control. We introduce the Embedded Virtual Machine (EVM), a programming abstraction where controller tasks with their control and timing properties are maintained across physical node boundaries and functionality is capable of migrating to the most competent set of physical controllers. In the context of process and discrete control, an EVM is the distributed runtime system that dynamically selects primary-backup sets of controllers given spatial and temporal constraints of the underlying wireless network. EVM-based algorithms allow network control algorithms to operate seamlessly over less reliable wireless networks with topological changes. They introduce new capabilities such as predictable outcomes during sensor/actuator failure, adaptation to mode changes, and runtime optimization of resource consumption. An automated design flow from Simulink to platform-independent domain-specific languages, and subsequently, to platform-dependent code generation is presented. Through case studies in discrete and process control we demonstrate the capabilities of EVM-based wireless network control systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lakshmanan:2012:OPM, author = "Karthik Lakshmanan and Dionisio {De Niz} and Ragunathan (Raj) Rajkumar and Gabriel Moreno", title = "Overload provisioning in mixed-criticality cyber-physical systems", journal = j-TECS, volume = "11", number = "4", pages = "83:1--83:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362350", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems are an emerging class of applications that require tightly coupled interaction between the computational and physical worlds. These systems are typically realized using sensor/actuator interfaces connected with processing backbones. Safety is a primary concern in cyber-physical systems since the actuators directly influence the physical world. However, unexpected or unusual conditions in the physical world can manifest themselves as increased workload demands being offered to the computational infrastructure of a cyber-physical system. Guaranteeing system safety under overload conditions is therefore a prime concern in developing and deploying cyber-physical systems. In this work, we study this problem in the context of a radar surveillance system, where tasks have different levels of criticality or influence on system safety. In the face of overloads, we observe that the desirable property in such systems is that the more critical tasks continue to meet their timing requirements. We capture this mixed-criticality overload requirement using a formal overload-tolerance metric called ductility. Using this overload-tolerance metric, we first develop our solution in the context of uniprocessor systems, where we show that Zero-Slack scheduling (ZS) algorithms can be used to improve the overload behavior in mixed-criticality cyber-physical systems compared to existing fixed-priority scheduling algorithms like Rate-Monotonic Scheduling (RMS) and Criticality-As-Priority-Assignment (CAPA). Leveraging these results, we then develop a criticality-aware task allocation algorithm called Compress-on-Overload Packing (COP) for dealing with multiprocessor cyber-physical systems. Evaluation results show that COP achieves up to five times better ductility than traditional load balancing bin-packing algorithms like Worst-Fit Decreasing (WFD). Finally, we apply ZS and COP to the radar surveillance system to demonstrate the resulting improvement in system overload behavior. Our implementation of the Zero-Slack scheduler is available as a part of the Linux/RK project, which provides resource kernel extensions for Linux.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Woehrle:2012:CTC, author = "Matthias Woehrle and Kai Lampka and Lothar Thiele", title = "Conformance testing for cyber-physical systems", journal = j-TECS, volume = "11", number = "4", pages = "84:1--84:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362351", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-Physical Systems (CPS) require a high degree of reliability and robustness. Hence it is important to assert their correctness with respect to extra-functional properties, like power consumption, temperature, etc. In turn the physical quantities may be exploited for assessing system implementations. This article develops a methodology for utilizing measurements of physical quantities for testing the conformance of a running CPS with respect to a formal description of its required behavior allowing to uncover defects. We present foundations and implementations of this approach and demonstrate its usefulness by conformance testing power measurements of a wireless sensor node with a formal model of its power consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2012:OTA, author = "Qi Zhu and Haibo Zeng and Wei Zheng and Marco {Di Natale} and Alberto Sangiovanni-Vincentelli", title = "Optimization of task allocation and priority assignment in hard real-time distributed systems", journal = j-TECS, volume = "11", number = "4", pages = "85:1--85:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362352", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The complexity and physical distribution of modern active safety, chassis, and powertrain automotive applications requires the use of distributed architectures. Complex functions designed as networks of function blocks exchanging signal information are deployed onto the physical HW and implemented in a SW architecture consisting of a set of tasks and messages. The typical configuration features priority-based scheduling of tasks and messages and imposes end-to-end deadlines. In this work, we present and compare formulations and procedures for the optimization of the task allocation, the signal to message mapping, and the assignment of priorities to tasks and messages in order to meet end-to-end deadline constraints and minimize latencies. Our formulations leverage worst-case response time analysis within a mixed integer linear optimization framework and are compared for performance against a simulated annealing implementation. The methods are applied for evaluation to an automotive case study of complexity comparable to industrial design problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cucinotta:2012:ART, author = "Tommaso Cucinotta and Fabio Checconi and Luca Abeni and Luigi Palopoli", title = "Adaptive real-time scheduling for legacy multimedia applications", journal = j-TECS, volume = "11", number = "4", pages = "86:1--86:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362353", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multimedia applications are often executed on standard personal computers. The absence of established standards has hindered the adoption of real-time scheduling solutions in this class of applications. Developers have adopted a wide range of heuristic approaches to achieve an acceptable timing behavior but the result is often unreliable. We propose a mechanism to extend the benefits of real-time scheduling to legacy applications based on the combination of two techniques: (1) a real-time monitor that observes and infers the activation period of the application, and (2) a feedback mechanism that adapts the scheduling parameters to improve its real-time performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Scharfenberger:2012:RIP, author = "Christian Scharfenberger and Samarajiit Chakraborty and Georg F{\"a}rber", title = "Robust image processing for an omnidirectional camera-based smart car door", journal = j-TECS, volume = "11", number = "4", pages = "87:1--87:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2362354", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Over the last decade, there has been an increasing emphasis on driver-assistance systems for the automotive domain. In this article, we report our work on designing a camera-based surveillance system embedded in a ``smart'' car door. Such a camera is used to monitor the ambient environment outside the car, for instance, the presence of obstacles such as approaching cars or cyclists who might collide with the car door if opened-and automatically control the car door operations. This is an enhancement to the currently available side-view mirrors that the driver/passenger checks before opening the car door. The focus of this article is on fast and robust image processing algorithms specifically targeting such a smart car door system. The requirement is to quickly detect traffic objects of interest from grayscale images captured by omnidirectional cameras. While known algorithms for object extraction from the image processing literature rely on color information and are sensitive to shadows and illumination changes, our proposed algorithms are highly robust, can operate on grayscale images (color images are not available in our setup), and output results in real time. We present a number of experimental results based on image sequences captured from real-life traffic scenarios to demonstrate the applicability of our algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gordon-Ross:2012:CCR, author = "Ann Gordon-Ross and Frank Vahid and Nikil Dutt", title = "Combining code reordering and cache configuration", journal = j-TECS, volume = "11", number = "4", pages = "88:1--88:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2399177", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The instruction cache is a popular optimization target due to the cache's high impact on system performance and power and because of the cache's predictable temporal and spatial locality. This article is an in depth study on the interaction of code reordering (a long-known technique) and cache configuration (a relatively new technique). Experimental results show that code reordering coupled with cache configuration reveals additional energy savings as high as 10--15\% for several benchmarks with reduced cache area as high as 48\%. To exploit these additional benefits, we architect and evaluate several design exploration heuristics for combining these two methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baiocchi:2012:EDB, author = "Jos{\'e} A. Baiocchi and Bruce R. Childers and Jack W. Davidson and Jason D. Hiser", title = "Enabling dynamic binary translation in embedded systems with scratchpad memory", journal = j-TECS, volume = "11", number = "4", pages = "89:1--89:??", month = dec, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2362336.2399178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 10 17:38:16 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Important challenges for embedded systems can be addressed by dynamic binary translation. A dynamic binary translator stores translated instructions in a software-managed code cache, which is usually large to minimize overhead. This article shows how to use a small scratchpad memory for the code cache. A small code cache may require frequent code evictions and retranslation, which degrade performance. We propose techniques to reduce the number of instructions inserted by the translator and a way to form fragments that minimizes translated code size. With our techniques, a much smaller code cache can hold a program's translated code working set.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khalgui:2013:ISI, author = "Mohamed Khalgui and Zhiwu Li", title = "Introduction to the {Special Issue on Modeling and Verification of Discrete Event Systems}", journal = j-TECS, volume = "12", number = "1", pages = "1:1--1:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2013:DLE, author = "Shouguang Wang and Chengying Wang and Yanping Yu", title = "Design of Liveness-Enforcing Supervisors for {S3PR} Based on Complementary Places", journal = j-TECS, volume = "12", number = "1", pages = "2:1--2:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, an algorithm is proposed to design liveness-enforcing supervisors for systems of simple sequential processes with resources (S$^3$PR) based on complementary places. Firstly, a mixed integer programming (MIP) based deadlock detection method is used to find unmarked strict minimal siphons from an infinite-capacity net. Next, the finite-capacity net, in which liveness can be enforced, is obtained by adding capacity function to the infinite-capacity net. Finally, complementary-place transformation is used to transform the finite-capacity net into an infinite-capacity net. This article focuses on adding a complementary place to each operation place that is related to unmarked siphons, deals with the deadlock problem from a new view point, and hence advances the deadlock control theory. Compared with the existing methods, the new policy is easier to implement for real industrial systems. More importantly, design of a complementary-place supervisor is very easy. Finally, in some cases, the new policy can obtain a structurally simpler supervisor with more permissive behavior than the existing methods do. A flexible manufacturing systems (FMS) example is used to compare the proposed policy with some other methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2013:CMS, author = "Yufeng Chen and Gaiyun Liu", title = "Computation of Minimal Siphons in {Petri} Nets by Using Binary Decision Diagrams", journal = j-TECS, volume = "12", number = "1", pages = "3:1--3:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Siphons play an important role in the development of deadlock control methods by using Petri nets. The number of siphons increases exponentially with respect to the size of a Petri net. This article presents a symbolic approach to the computation of minimal siphons in Petri nets by using binary decision diagrams (BDD). The siphons of a Petri net can be found via a set of logic conditions. The logic conditions are symbolically modeled by using Boolean algebras. The operations of Boolean algebras are implemented by BDD that are capable of representing large sets of siphons with small shared data structures. The proposed method first uses BDD to compute all siphons of a Petri net and then a binary relation is designed to extract all minimal siphons. Finally, by using a number of examples, the efficiency of the proposed method is verified through different-sized problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ding:2013:DAV, author = "Zhijun Ding and Changjun Jiang and Mengchu Zhou", title = "Design, Analysis and Verification of Real-Time Systems Based on Time {Petri} Net Refinement", journal = j-TECS, volume = "12", number = "1", pages = "4:1--4:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A type of refinement operations of time Petri nets is presented for design, analysis and verification of complex real-time systems. First, the behavior preservation is studied under time constraints in a refinement operation, and a sufficient condition for behavior preservation is obtained. Then, the property preservation is considered, and the results indicate that if the refinement operation of time Petri nets satisfies behavior preservation, it can also preserve properties such as boundedness and liveness. Finally, based on the behavior preservation, a reachability decidability algorithm of a refined time Petri net is designed using the reachability trees of its original net and subnet. The research results are illustrated by an example of designing, analyzing and verifying a real-time manufacturing system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{McInnes:2013:MAT, author = "Allan I. McInnes", title = "Modeling and Analysis of {TinyOS} Sensor Node Firmware: a {CSP} Approach", journal = j-TECS, volume = "12", number = "1", pages = "5:1--5:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406341", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless sensor networks are an increasingly popular application area for embedded systems. Individual sensor nodes within a network are typically resource-constrained, event-driven, and require a high degree of concurrency. This combination of requirements motivated the development of the widely used TinyOS sensor node operating system. The TinyOS concurrency model is a lightweight nonpreemptive system designed to suit the needs of typical sensor network applications. Although the TinyOS concurrency model is easier to reason about than preemptive threads, it can still give rise to undesirable behavior due to unexpected interleavings of related tasks, or unanticipated preemption by interrupt handlers. To aid TinyOS developers in understanding the behavior of their programs we have developed a technique for using the process algebra Communicating Sequential Processes (CSP) to model the interactions between TinyOS components, and between an application and the TinyOS scheduling and preemption mechanisms. Analysis of the resulting models can help TinyOS developers to discover and diagnose concurrency-related errors in their designs that might otherwise go undetected until after the application has been widely deployed. Such analysis is particularly valuable for the TinyOS components that are used as building blocks for a large number of other applications, since a subtle or sporadic error in a widely deployed building block component could be extremely costly to repair.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Godary-Dejean:2013:FVD, author = "Karen Godary-Dejean and David Andreu", title = "Formal Validation of a Deterministic {MAC} Protocol", journal = j-TECS, volume = "12", number = "1", pages = "6:1--6:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406342", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article deals with the formal validation of STIMAP, a medium access protocol that has been designed to meet the specific requirements of an implantable network-based neuroprosthesis. This article presents the modeling and the validation of its medium access, using model checking on Time Petri Nets. Doing so, we show that existent formal methods and tools are not perfectly suitable for the validation of real systems, especially when some hardware parameters have to be considered. This article then presents how these difficulties have been managed during the modeling and verification phases, and gives the validation results for STIMAP, providing constraints to respect.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boucheneb:2013:RIS, author = "Hanifa Boucheneb and Kamel Barkaoui", title = "Reducing Interleaving Semantics Redundancy in Reachability Analysis of Time {Petri} Nets", journal = j-TECS, volume = "12", number = "1", pages = "7:1--7:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406343", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The main problem of verification techniques based on exploration of (reachable) state space is the state explosion problem. In timed models, abstract states reached by different interleavings of the same set of transitions are, in general, different and their union is not necessarily an abstract state. To attenuate this state explosion, it would be interesting to reduce the redundancy caused by the interleaving semantics by agglomerating all these abstract states whenever their union is an abstract state. This article considers the time Petri net model and establishes some sufficient conditions that ensure that this union is an abstract state. In addition, it proposes a procedure to compute this union without computing beforehand intermediate abstract states. Finally, it shows how to use this result to improve the reachability analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2013:SCE, author = "Zhiming Zhang and Weimin Wu", title = "Sequence Control of Essential Siphons for Deadlock Prevention in {Petri} Nets", journal = j-TECS, volume = "12", number = "1", pages = "8:1--8:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406344", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Deadlock prevention is crucial to the modeling of flexible manufacturing systems. In the Petri net framework, deadlock prevention is often addressed by siphon-based control (SC) policies. Recent research results show that SC methods can avoid full siphon enumeration by using mixed integer programming (MIP) to greatly increase the computational efficiency so that it can be applied in large systems in computable time. Besides, maximally permissive control solutions can be obtained by means of iterative siphon control (ISC) approaches and MIP. Then the remaining problems are redundancy and MIP iterations. Redundant controllers make the closed-loop system more complicated and each MIP iteration increases the total computational time. This article proposes a revised ISC deadlock prevention policy which can achieve better results than the other reported methods in terms of redundancy and MIP iterations while maintaining the maximal permissiveness. Several benchmark examples are provided to illustrate the proposed approach and to be compared with the other reported methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2013:HGA, author = "Zakir Hussain Ahmed", title = "A Hybrid Genetic Algorithm for the {Bottleneck Traveling Salesman Problem}", journal = j-TECS, volume = "12", number = "1", pages = "9:1--9:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406345", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The bottleneck traveling salesman problem is to find a Hamiltonian circuit that minimizes the largest cost of any of its arcs in a graph. A simple genetic algorithm (GA) using sequential constructive crossover has been developed to obtain heuristic solution to the problem. The hybrid GA incorporates 2-opt search, another proposed local search and immigration to the simple GA for obtaining better solution. The efficiency of our hybrid GA to the problem against two existing heuristic algorithms has been examined for some symmetric TSPLIB instances. The comparative study shows the effectiveness of our hybrid algorithm. Finally, we present solutions to the problem for asymmetric TSPLIB instances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2013:OSL, author = "Naiqi Wu and Mengchu Zhou and Gang Hu", title = "One-Step Look-Ahead Maximally Permissive Deadlock Control of {AMS} by Using {Petri} Nets", journal = j-TECS, volume = "12", number = "1", pages = "10:1--10:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406346", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "It is desired that a deadlock control policy for automated manufacturing systems (AMS) is maximally permissive. However, its tractability issue remains open, and this work addresses this important issue. It models AMS with a resource-oriented Petri net (ROPN) and presents a necessary and sufficient condition under which there exists a one-step look-ahead maximally permissive control policy for deadlock avoidance in AMS. It further identifies some conditions under which a one-step look-ahead maximally permissive deadlock control policy exists for a single-capacity system. The conditions can be conveniently examined by using the developed ROPN model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2013:TBD, author = "Yi-Sheng Huang and Yen-Liang Pan and Pin-June Su", title = "Transition-Based Deadlock Detection and Recovery Policy for {FMSs} Using Graph Technique", journal = j-TECS, volume = "12", number = "1", pages = "11:1--11:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406347", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A transition-controlled deadlock detection and recovery prevention policy is presented for a subclass of Petri nets used to model flexible manufacturing systems. The subclass is called systems of simple sequential processes with resources (S$^3$PR). The proposed policy is different from the standard deadlock prevention policies. Instead of adding control places, this policy adds a controlled transition to solve a group of deadlocked markings that have the same graph-based property. Finally, the results of our study indicate that the proposed policy appears to be more permissive than those existing ones that add control places.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nazemzadeh:2013:FMD, author = "Payam Nazemzadeh and Abbas Dideban and Meisam Zareiee", title = "Fault Modeling in Discrete Event Systems Using {Petri} Nets", journal = j-TECS, volume = "12", number = "1", pages = "12:1--12:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406348", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article a model-based controller reconfiguration method for fault-tolerant control of discrete event systems has been introduced. In this method, we model the fault conditions for each specified fault as a new model called fault model. The system then consists of three different models called process, specification and fault. The faulty parts of the system are not permitted to do any job and the controller tries to enforce the specifications by other parts of the system. With this method, the controller reconfiguration problem for fault- tolerant control of discrete event systems converts to the problem of synchronizing the process, specification and fault model. We must synthesize a supervisor that can enforce both specifications and faults status. If this supervisor can be determined, we can achieve a fault-tolerant controller. Implementing both specification and fault models in the system, may lead to a large number of forbidden states and constraints and so on a more complicated forbidden states problem must be solved. The application of constraints simplification methods is shown. By the existing methods for offline simplifying of constraints, we can arrive at a simplified fault tolerant controller.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mhamdi:2013:FMT, author = "Tarek Mhamdi and Osman Hasan and Sofi{\`e}ne Tahar", title = "Formalization of Measure Theory and {Lebesgue} Integration for Probabilistic Analysis in {HOL}", journal = j-TECS, volume = "12", number = "1", pages = "13:1--13:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406349", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamic systems that exhibit probabilistic behavior represent a large class of man-made systems such as communication networks, air traffic control, and other mission-critical systems. Evaluation of quantitative issues like performance and dependability of these systems is of paramount importance. In this paper, we propose a generalized methodology to formally reason about probabilistic systems within a theorem prover. We present a formalization of measure theory in the HOL theorem prover and use it to formalize basic concepts from the theory of probability. We also use the Lebesgue integration to formalize statistical properties of random variables. To illustrate the practical effectiveness of our methodology, we formally prove classical results from the theories of probability and information and use them in a data compression application in HOL.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khalgui:2013:RRE, author = "Mohamed Khalgui and Olfa Mosbahi and Zhiwu Li", title = "Runtime Reconfigurations of Embedded Controllers", journal = j-TECS, volume = "12", number = "1", pages = "14:1--14:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406350", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The article deals with Reconfigurable Embedded Control Systems following different Component-based Technologies and/or Architecture Description Languages used today in Industry. We define a Control Component as a software unit to support control tasks of the system which is assumed to be a network of components with precedence constraints. We define an agent-based architecture to handle automatic reconfigurations under well-defined conditions by creating, deleting or updating components to bring the whole system into safe and optimal behaviors. To cover all possible reconfiguration forms, we model the agent by nested state machines according to the formalism Net Condition/Event Systems (abbr. NCES) which is an extension of Petri nets. We apply in addition a model checking to verify functional and extra-functional properties according to the temporal logic ``Computation Tree Logic'' (abbr. CTL). The goal is to check the agent's reactivity after any evolution of the environment. Several complex networks can implement the system such that each one is executed at a given time when a corresponding reconfiguration scenario is automatically applied by the agent. To check the correctness of each one of them, we apply in several steps a refinement-based approach that automatically specifies feasible Control Components according to NCES. The model checker SESA is automatically applied in each step to verify deadlock properties of new generated components, and is manually used to verify CTL-based properties according to user requirements. Two Industrial Benchmark Production Systems FESTO and EnAS available in our research laboratory are applied to explain the article's contributions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mery:2013:FSM, author = "Dominique M{\'e}ry and Neeraj Kumar Singh", title = "Formal Specification of Medical Systems by Proof-Based Refinement", journal = j-TECS, volume = "12", number = "1", pages = "15:1--15:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406351", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Formal methods have emerged as an alternative approach to ensuring quality and correctness of highly critical systems, overcoming limitations of traditional validation techniques such as simulation and testing. We propose a refinement-based methodology for complex medical systems design, which possesses all the required key features. A refinement-based combined approach of formal verification, model validation using a model-checker and refinement chart is proposed in this methodology for designing a high-confidence medical device. Furthermore, we show the effectiveness of this methodology for the design of a cardiac pacemaker system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mosbahi:2013:CFM, author = "Olfa Mosbahi", title = "Combining Formal Methods for the Development of Reactive Systems", journal = j-TECS, volume = "12", number = "1", pages = "16:1--16:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406352", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article deals with the use of two verification approaches: theorem proving and model checking. We focus on the Event-B method by using its associated theorem proving tool (Click\_n\_Prove), and on the language TLA$^+$ by using its model checker TLC. By considering the limitation of the Event-B method to invariance properties, we propose to apply the language TLA$^+$ to verify liveness properties on a software behavior. We extend first the expressivity and the semantics of a B model (called temporal B model) to deal with the specification of fairness and eventuality properties. Second, we give transformation rules from a temporal B model into a TLA$^+$ module. We present in particular, our prototype system called B2TLA$^+$, that we have developed to support this transformation; then we can verify these properties thanks to the model checker TLC on finite state systems. For the verification of infinite-state systems, we propose the use of the predicate diagrams. We illustrate our approach on a case study of a parcel sorting system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sunder:2013:FVD, author = "Christoph S{\"u}nder and Valeriy Vyatkin and Alois Zoitl", title = "Formal Verification of Downtimeless System Evolution in Embedded Automation Controllers", journal = j-TECS, volume = "12", number = "1", pages = "17:1--17:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406353", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a new formal approach to validation of on-the-fly modification of control software in automation systems. The concept of downtimeless system evolution (DSE) is introduced. The DSE is essentially based on the use of IEC 61499 system architecture and formal modeling and verification of the hardware and software of an automation device. The validation is performed by means of two complimentary techniques: analytic calculations and formal verification by model-checking.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khalgui:2013:DRA, author = "Mohamed Khalgui", title = "Distributed Reconfigurations of Autonomous {IEC61499} Systems", journal = j-TECS, volume = "12", number = "1", pages = "18:1--18:??", month = jan, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2406336.2406354", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Jan 25 17:38:43 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The article deals with Distributed Multiagent Reconfigurable Embedded Control Systems following the International Industrial Standard IEC61499 in which a Function Block (Abbreviated by FB) is an event-triggered software component owning data and a control system is a network of distributed blocks. We define a multiagent embedded architecture in which a Reconfiguration Agent is affected to each device of the execution environment to apply local reconfigurations, and a Coordination Agent is proposed for coordination between devices in order to guarantee safe and coherent distributed reconfigurations. A Communication Protocol is proposed to handle such coordination by using well-defined Coordination Matrices. A prototype is developed to simulate the whole architecture when faults occur or system's optimizations are applied. We specify Reconfiguration Agents to be modeled by nested state machines, and the Coordination Agent according to the formalism Net Condition/Event Systems (Abbreviated by NCES) which is an extension of Petri nets. To allow correct and coherent distributed reconfigurations, we check all possible interactions between controllers by verifying that whenever a reconfiguration is applied in a device, the Coordination Agent and other concerned devices react as described in user requirements. We propose finally XML-based implementations of both Coordination and Reconfiguration Agents according to the technology IEC61499. The article's contributions are applied to two Benchmark Production Systems available in our research laboratory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2013:ISS, author = "Jian-Jia Chen and Maurizio Palesi", title = "Introduction to the special section on {ESTIMedia'12}", journal = j-TECS, volume = "12", number = "1s", pages = "32:1--32:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435228", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nikitakis:2013:NLP, author = "Antonis Nikitakis and Savvas Papaioannou and Ioannis Papaefstathiou", title = "A novel low-power embedded object recognition system working at multi-frames per second", journal = j-TECS, volume = "12", number = "1s", pages = "33:1--33:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435229", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One very important challenge in the field of multimedia is the implementation of fast and detailed Object Detection and Recognition systems. In particular, in the current state-of-the-art mobile multimedia systems, it is highly desirable to detect and locate certain objects within a video frame in real time. Although a significant number of Object Detection and Recognition schemes have been developed and implemented, triggering very accurate results, the vast majority of them cannot be applied in state-of-the-art mobile multimedia devices; this is mainly due to the fact that they are highly complex schemes that require a significant amount of processing power, while they are also time consuming and very power hungry. In this article, we present a novel FPGA-based embedded implementation of a very efficient object recognition algorithm called Receptive Field Cooccurrence Histograms Algorithm (RFCH). Our main focus was to increase its performance so as to be able to handle the object recognition task of today's highly sophisticated embedded multimedia systems while keeping its energy consumption at very low levels. Our low-power embedded reconfigurable system is at least 15 times faster than the software implementation on a low-voltage high-end CPU, while consuming at least 60 times less energy. Our novel system is also 88 times more energy efficient than the recently introduced low-power multi-core Intel devices which are optimized for embedded systems. This is, to the best of our knowledge, the first system presented that can execute the complete complex object recognition task at a multi frame per second rate while consuming minimal amounts of energy, making it an ideal candidate for future embedded multimedia systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhai:2013:MSA, author = "Jiali Teddy Zhai and Hristo Nikolov and Todor Stefanov", title = "Mapping of streaming applications considering alternative application specifications", journal = j-TECS, volume = "12", number = "1s", pages = "34:1--34:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435230", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Streaming applications often require a parallel Model of Computation (MoC) to specify their application behavior and to facilitate mapping onto Multi-Processor System-on-Chip (MPSoC) platforms. Various performance requirements and resource budgets of embedded systems ask for an efficient design space exploration (DSE) approach to select the best design from a design space consisting of a large number of design choices. However, existing DSE approaches explore the design space that includes only architecture and mapping alternatives for an initial application specification given by the application designer. In this article, we first show that a design often might not be optimal if alternative specifications of a given application are not taken into account. We further argue that the best alternative specification consists of only independent and load-balanced application tasks. Based on the Polyhedral Process Network (PPN) MoC, we present an approach to analyze and transform an initial PPN to an alternative one that contains only independent processes if possible. Finally, by prototyping real-life applications on both FPGA-based MPSoCs and desktop multi-core platforms, we demonstrate that mapping the alternative application specification results in a large performance gain compared to those approaches, in which alternative application specifications are not taken into account.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Geuns:2013:SST, author = "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco J. G. Bekooij", title = "Sequential specification of time-aware stream processing applications", journal = j-TECS, volume = "12", number = "1s", pages = "35:1--35:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435231", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Automatic parallelization of Nested Loop Programs (NLPs) is an attractive method to create embedded real-time stream processing applications for multi-core systems. However, the description and parallelization of applications with a time dependent functional behavior has not been considered in NLPs. In such a description, semantic information about time dependent behavior must be made available for the compiler, such that an optimized time independent implementation can be generated automatically. This article introduces language constructs with temporal semantics to NLPs. Using these language constructs, time dependent applications can be specified and a corresponding data-driven implementation can be generated for use on a multi-core system. Despite that these time-aware language constructs can be data-dependent, the application remains functionally deterministic. Pipelining is exploited to increase the throughput of an application. The media access control (MAC) protocol of an IEEE 802.11p WLAN transceiver is used to illustrate the relevance and applicability of the introduced concepts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2013:LAB, author = "Daeyoung Lee and Hyunok Oh", title = "A lifetime aware buffer assignment method for streaming applications on {DRAM\slash PRAM} hybrid memory", journal = j-TECS, volume = "12", number = "1s", pages = "36:1--36:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435232", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a lifetime aware buffer assignment method for streaming applications like multimedia specified in a synchronous dataflow (SDF) graph on a DRAM/PRAM hybrid memory in which the endurance of PRAM is limited. We determine whether buffers are assigned to DRAM or PRAM to minimize the writing frequency of PRAM. To solve the problems, we formulate them using Answer Set Programming. Experimental results show that the proposed approach increases the PRAM lifetime by 63\% compared with no optimization, and shows the tradeoff between PRAM and DRAM size to guarantee a lifetime constraint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chung:2013:EUE, author = "Yi-Fan Chung and Yin-Tsung Lo and Chung-Ta King", title = "Enhancing user experiences by exploiting energy and launch delay trade-off of mobile multimedia applications", journal = j-TECS, volume = "12", number = "1s", pages = "37:1--37:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435233", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Launch delay has been an important factor affecting users' experiences in mobile multimedia applications. To launch applications quickly, modern mobile systems such as Android usually keep inactive applications in the background and manage them through an LRU-based activity stack. Whenever the user wants to run and interact with a background application again, that application can be switched back into the foreground quickly from the activity stack without delay in initializing the applications anew. Since background multimedia applications often continuously consume the battery power of the smart phone, the challenge is to effect a balance between application launch delay and battery lifetime. In this article, we propose innovative application management strategies that terminate ``unbeneficial'' background applications to save energy and pre-launch ``beneficial'' applications to improve the application launch delay. The proposed strategies are evaluated through a trace-driven simulation and a real experiment. The results show that the average application launch delay can be reduced by 15\% while the average battery lifetime is increased by 18\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DeSutter:2013:ISS, author = "Bjorn {De Sutter} and Jan Vitek", title = "Introduction to the special section on {LCTES'11}", journal = j-TECS, volume = "12", number = "1s", pages = "38:1--38:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435234", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Berthier:2013:SPD, author = "Nicolas Berthier and Florence Maraninchi and Laurent Mounier", title = "Synchronous programming of device drivers for global resource control in embedded operating systems", journal = j-TECS, volume = "12", number = "1s", pages = "39:1--39:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435235", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In embedded systems, controlling a shared resource like a bus, or improving a property like power consumption, may be hard to achieve when programming device drivers individually. In this article, we propose a global resource control approach, based on a centralized view of the devices' states. The solution we propose operates on the hardware/software interface. It involves a simple adaptation of the application level, to communicate with the hardware via a control layer. The control layer itself is built from a set of simple automata: the device drivers, whose states correspond to functional or power consumption modes, and a controller to enforce global properties. All these automata are programmed using a synchronous language, and compiled into a single piece of C code. We take as example the node of a sensor network. We explain the approach in details, demonstrate its use and benefits with an event-driven or multithreading operating system, and draw guidelines for its use in other contexts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cullmann:2013:CPA, author = "Christoph Cullmann", title = "Cache persistence analysis: Theory and practice", journal = j-TECS, volume = "12", number = "1s", pages = "40:1--40:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435236", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To compute a worst-case execution time (WCET) estimate for a program, the architectural effects of the underlying hardware must be modeled. For modern processors this results in the need for a cache and pipeline analysis. The timing-relevant result of the cache analysis is the categorization of the accesses to cached memory. Categorizations that are obtainable by the well-known must and may cache analysis [Ferdinand 1997] are always-hit, always-miss and not-classified. The cache persistence analysis tries to provide additional information for the not-classified case to limit the number of misses. There exists a cache persistence analysis by Ferdinand and Wilhelm based on abstract interpretation computing these classifications. In this article, we present a correctness issue with this analysis. To fix this issue, we propose two new abstract interpretation based persistence analyses and show their safety. One is based on the known may analysis and a second one on the concept of conflict counting. For fully timing compositional architectures [Wilhelm et al. 2009] the persistence information is straightforward to use. We will apply the concepts of persistence analysis for the first time to state-of-the-art architectures that exhibit both timing anomalies and domino effects. Such architectures do not allow the analyzer to quantify the costs of a single cache hit or miss in isolation. To make the usage of the persistence information feasible, we integrate the presented novel persistence analyses together with a novel path analysis approach into the industrially used WCET analyzer aiT.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sifakis:2013:ISS, author = "Joseph Sifakis and Lothar Thiele and Reinhard Wilhelm", title = "Introduction to the special section on rigorous embedded systems design", journal = j-TECS, volume = "12", number = "1s", pages = "41:1--41:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435237", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reineke:2013:SCR, author = "Jan Reineke and Daniel Grund", title = "Sensitivity of cache replacement policies", journal = j-TECS, volume = "12", number = "1s", pages = "42:1--42:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435238", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The sensitivity of a cache replacement policy expresses to what extent the execution history may influence the number of cache hits and misses during program execution. We present an algorithm to compute the sensitivity of a replacement policy. We have implemented this algorithm in a tool called R elacs that can handle a large class of replacement policies including LRU, FIFO, PLRU, and MRU. Sensitivity properties obtained with Relacs demonstrate that the execution history can have a strong impact on the number of cache hits and misses if FIFO, PLRU, or MRU is used. A simple model of execution time is used to evaluate the impact of cache sensitivity on measured execution times. The model shows that measured execution times may strongly underestimate the worst-case execution time for FIFO, PLRU, and MRU.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jeong:2013:RRM, author = "Jinkyu Jeong and Hwanju Kim and Jeaho Hwang and Joonwon Lee and Seungryoul Maeng", title = "Rigorous rental memory management for embedded systems", journal = j-TECS, volume = "12", number = "1s", pages = "43:1--43:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435239", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Memory reservation in embedded systems is a prevalent approach to provide a physically contiguous memory region to its integrated devices, such as a camera device and a video decoder. Inefficiency of the memory reservation becomes a more significant problem in emerging embedded systems, such as smartphones and smart TVs. Many ways of using these systems increase the idle time of their integrated devices, and eventually decrease the utilization of their reserved memory. In this article, we propose a scheme to minimize the memory inefficiency caused by the memory reservation. The memory space reserved for a device can be rented for other purposes when the device is not active. For this scheme to be viable, latencies associated with reallocating the memory space should be minimal. Volatile pages are good candidates for such page reallocation since they can be reclaimed immediately as they are needed by the original device. We also provide two optimization techniques, lazy-migration and adaptive-activation. The former increases the lowered utilization of the rental memory by our volatile page allocations, and the latter saves active pages in the rental memory during the reallocation. We implemented our scheme on a smartphone development board with the Android Linux kernel. Our prototype has shown that the time for the return operation is less than 0.77 seconds in the tested cases. We believe that this time is acceptable to end-users in terms of transparency since the time can be hidden in application initialization time. The rental memory also brings throughput increases ranging from 2\% to 200\% based on the available memory and the applications' memory intensiveness.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vasilikos:2013:HSA, author = "Vasileios Vasilikos and Georgios Smaragdos and Christos Strydis and Ioannis Sourdis", title = "Heuristic search for adaptive, defect-tolerant multiprocessor arrays", journal = j-TECS, volume = "12", number = "1s", pages = "44:1--44:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435240", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, new heuristic-search methods and algorithms are presented for enabling highly efficient and adaptive, defect-tolerant multiprocessor arrays. We consider systems where a homogeneous multiprocessor array lies on top of reconfigurable interconnects which allow the pipeline stages of the processors to be connected in all possible configurations. Considering the multiprocessor array partitioned in substitutable units at the granularity of pipeline stages, we employ a variety of heuristic-search methods and algorithms to isolate and replace defective units. The proposed heuristics are designed for off-line execution and aim at minimizing the performance overhead necessarily introduced to the array by the interconnects' latency. An empirical evaluation of the designed algorithms is then carried out, in order to assess the targeted problem and the efficacy of our approach. Our findings indicate this to be a NP-complete computational problem, however, our heuristic-search methods can achieve, for the problem sizes we exhaustively searched, 100\% accuracy in finding the optimal solution among 10$^{19}$ possible candidates within 2.5 seconds. Alternatively, they can provide near-optimal solutions at an accuracy which consistently exceeds 70\% (compared to the optimal solution) in only 10$^{-4}$ seconds.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Marinescu:2013:FSJ, author = "Maria-Cristina Marinescu and C{\'e}sar S{\'a}nchez", title = "Fusing statecharts and {Java}", journal = j-TECS, volume = "12", number = "1s", pages = "45:1--45:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435241", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents FUSE, an approach for modeling and implementing embedded software components which starts from a main-stream programming language and brings some of the key concepts of Statecharts as first-class elements within this language. Our approach provides a unified programming environment which not only preserves some of the advantages of Statecharts' formal foundation but also directly supports features of object-orientation and strong typing. By specifying Statecharts directly in FUSE we eliminate the out-of-synch between the model and the generated code and we allow the tuning and debugging to be done within the same programming model. This article describes the main language constructs of FUSE and presents its semantics by translation into the Java programming language. We conclude by discussing extensions to the base language which enable the efficient static checking of program properties.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hubner:2013:ISS, author = "Michael H{\"u}bner", title = "Introduction to the special section on multiprocessor system-on-chip for cyber-physical systems", journal = j-TECS, volume = "12", number = "1s", pages = "46:1--46:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435242", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paulin:2013:PPP, author = "Pierre G. Paulin and Ali Erdem {\"O}zcan and Vincent Gagn{\'e} and Bruno Lavigueur and Olivier Benny", title = "Parallel programming patterns for multi-processor {SoC}: Application to video processing", journal = j-TECS, volume = "12", number = "1s", pages = "47:1--47:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435243", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Efficient, scalable and productive parallel programming is a major challenge for exploiting the future multi-processor SoC platforms. This article presents the MultiFlex programming environment which has been developed to address this challenge. It is targeted for use on Platform 2012, a scalable multi-processor fabric. The MultiFlex environment supports high-level simulation, iterative platform mapping, and includes tools for programming model aware debug, trace, visualization and analysis. This article focuses on the two classes of programming abstractions supported in MultiFlex. The first is a set of Parallel Programming Patterns (PPP) which offer a rich set of programming abstractions for implementing efficient data- and task-level parallel applications. The second is a Reactive Task Management (RTM) abstraction, which offers a lightweight C-based API to support dynamic dispatching of small grain tasks on tightly coupled parallel processing resources. The use of the MultiFlex native programming model is illustrated through the capture and mapping of two representative video applications. The first is a high-quality rescaling (HQR) application on a multi-processor platform. We present the details of the optimization process which was required for mapping the HQR application, for which the reference code requires 350 GIPS (giga instructions per second), onto a 16 processor cluster. Our results show that the parallel implementation using the PPP model offers almost linear acceleration with respect to the number of processing elements. The second application is a high-definition VC-1 decoder. For this application, we illustrate two different parallel programming model variants, one using PPPs, the other based on RTM. These two versions are mapped onto two variants of a homogeneous version of the Platform 2012 multi-core fabric.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thiele:2013:PTT, author = "Lothar Thiele and Lars Schor and Iuliana Bacivarov and Hoeseok Yang", title = "Predictability for timing and temperature in multiprocessor system-on-chip platforms", journal = j-TECS, volume = "12", number = "1s", pages = "48:1--48:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435244", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "High computational performance in multiprocessor system-on-chips (MPSoCs) is constrained by the ever-increasing power densities in integrated circuits, so that nowadays MPSoCs face various thermal issues. For instance, high chip temperatures may lead to long-term reliability concerns and short-term functional errors. Therefore, the new challenge in designing embedded real-time MPSoCs is to guarantee the final performance and correct function of the system, considering both functional and non-functional properties. One way to achieve this is by ruling out mapping alternatives that do not fulfill requirements on performance or peak temperature already in early design stages. In this article, we propose a thermal-aware optimization framework for mapping real-time applications onto MPSoC platforms. The performance and temperature of mapping candidates are evaluated by formal temporal and thermal analysis models. To this end, analysis models are automatically generated during design space exploration, based on the same specifications as used for software synthesis. The analysis models are automatically calibrated with performance data reflecting the execution of the system on the target platform. The data is automatically obtained prior to design space exploration based on a set of benchmark mappings. Case studies show that the performance and temperature requirements are often conflicting goals and optimizing them together leads to major benefits in terms of a guaranteed and predictable high performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Davare:2013:MDE, author = "Abhijit Davare and Douglas Densmore and Liangpeng Guo and Roberto Passerone and Alberto L. Sangiovanni-Vincentelli and Alena Simalatsar and Qi Zhu", title = "{metroII}: a design environment for cyber-physical systems", journal = j-TECS, volume = "12", number = "1s", pages = "49:1--49:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435245", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-Physical Systems are integrations of computation and physical processes and as such, will be increasingly relevant to industry and people. The complexity of designing CPS resides in their heterogeneity. Heterogeneity manifest itself in modeling their functionality as well as in the implementation platforms that include a multiplicity of components such as microprocessors, signal processors, peripherals, memories, sensors and actuators often integrated on a single chip or on a small package such as a multi-chip module. We need a methodology, tools and environments where heterogeneity can be dealt with at all levels of abstraction and where different tools can be integrated. We present here Platform-Based Design as the CPS methodology of choice and metroII, a design environment that supports it. We present the metamodeling approach followed in metroII, how to couple the functionality and implementation platforms of CPS, and the simulation technology that supports the analysis of CPS and of their implementation. We also present examples of use and the integration of metroII with another popular design environment developed at Verimag, BIP.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bogdan:2013:PCH, author = "Paul Bogdan and Siddharth Jain and Radu Marculescu", title = "Pacemaker control of heart rate variability: a cyber physical system perspective", journal = j-TECS, volume = "12", number = "1s", pages = "50:1--50:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435246", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cardiac diseases, like those related to abnormal heart rate activity, have an enormous economic and psychological impact worldwide. The approaches used to control the behavior of modern pacemakers ignore the fractal nature of heart rate activity. The purpose of this article is to present a Cyber Physical System approach to pacemaker design that exploits precisely the fractal properties of heart rate activity in order to design the pacemaker controller. Towards this end, we solve a finite horizon optimal control problem based on the heartbeat time series and show that this control problem can be converted into a system of linear equations. We also compare and contrast the performance of the fractal optimal control problem under six different cost functions. Finally, to get an idea of hardware complexity, we implement the fractal optimal controller on a Virtex4 FPGA and report some preliminary results in terms of area overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gohringer:2013:RAN, author = "Diana G{\"o}hringer and Lukas Meder and Oliver Oey and J{\"u}rgen Becker", title = "Reliable and adaptive network-on-chip architectures for cyber physical systems", journal = j-TECS, volume = "12", number = "1s", pages = "51:1--51:??", month = mar, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2435227.2435247", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 19 07:54:21 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Reliability in embedded systems is crucial for many application domains. Especially, for safety critical application, as they can be found in the automotive and avionic domain, a high reliability has to be ensured. The technology in chip production undergoes a steady shrinking process from nowadays 25 nanometers. It is proven that coming technologies, which are much smaller, can have a higher defect rate after production, but also at runtime. The physical effects at runtime come from a higher susceptibility for radiation. Since the silicon die of a field programmable gate array (FPGA) includes a high amount of physical wiring, the radiation effect plays here a major role. Therefore, this article describes an approach of a reliable Network-on-Chip (NoC) which can be used for an FPGA-based system. The article describes the concept and the physical realization of this NoC and evaluates its reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2013:SIE, author = "Jongsung Kim and Javier A. Barria and Morris Chang and Victor C. M. Leung", title = "Special issue on embedded systems for interactive multimedia services {(ES-IMS)}", journal = j-TECS, volume = "12", number = "2", pages = "19:1--19:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423637", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2013:ELS, author = "Yeong-Sheng Chen and Yun-Ju Ting and Chih-Heng Ke and Naveen Chilamkruti and Jong Hyuk Park", title = "Efficient localization scheme with ring overlapping by utilizing mobile anchors in wireless sensor networks", journal = j-TECS, volume = "12", number = "2", pages = "20:1--20:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423638", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This study proposes an efficient localization scheme in wireless sensor networks. The proposed scheme utilizes mobile anchors and is based on ring overlapping. In a wireless sensor network, the nodes that know their locations are called reference nodes, and the other nodes that are without the knowledge of their locations are called blind nodes. To localize a certain blind node, by comparing the relative RSSI (Received Signal Strength Indicator) values among nodes, mobile beacons are utilized to find out the rings that are centered at a reference node and contain the blind node. These rings are called B-Rings. Since the mobile anchors and the reference nodes know their own locations, the B-Rings can be precisely derived. Moreover, by using multiple mobile beacons, the widths of the B-Rings can be further minimized; and then by overlapping them, the location of the blind nodes can be efficiently estimated. Most existing localization schemes that utilize mobile anchors let the mobile anchors move randomly. In contrast, the proposed scheme provides regular and simple movement mechanisms for the mobile anchors. Thus, the mobile anchors consume less energy than the other schemes, in which the mobile anchors move randomly. Analytical analysis and simulation results show that the proposed localization mechanism can achieve better location accuracy as well as less movement length of the mobile anchor than the other existing related approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2013:DEI, author = "Hung-Min Sun and Chi-Yao Weng and Shiuh-Jeng Wang and Cheng-Hsing Yang", title = "Data embedding in image-media using weight-function on modulo operations", journal = j-TECS, volume = "12", number = "2", pages = "21:1--21:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multimedia hiding system is to embed message behind the specified media, but it is still kept normal in media representations via human sensitive organizations without causing imperceptibility. In this article, we propose a data hiding system by means of flexible exploiting modification directions to achieve safer message concealments in image-media. In our scheme, $n$ cover-pixels are flexibly chosen on modulo operations to embed a secret $s$, where $ n = \lceil \log_3 (s) \rceil $. The varied pixel values associated with the chosen $n$ pixels are only changed among $ [ - 1, 1] $. Because the numbers of adjustable pixels are much greater than the pixels in the past scheme, our scheme is able to obtain a higher embedded ratio in response to the capacity requirements of information hiding systems. In addition, we also applied the statistics-steganalyzers to demonstrate that our scheme has accomplishment not only higher capacity but also kept the robustness against the blind steganalyzers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2013:AIG, author = "Sanghyun Seo and Seungtaek Ryoo and Kyunghyun Yoon", title = "Artistic image generation for emerging multimedia services by impressionist manner", journal = j-TECS, volume = "12", number = "2", pages = "22:1--22:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423640", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose the rendering framework for painting-like image generation and general system architecture for mobile device. Especially, we focused on a color division method for generating neo-impressionist images. The French painter, George Seurat, introduced pointillism under the theory that the individual pigments of colors on the canvas are reconstructed on the human retina. Pointillism is a painting technique in which many small brush strokes are combined to form a picture and determines the color of brush strokes based on the optical mixing of juxtaposed colors. In order to express countless separate dots, we form hierarchical points using Wang Tiles contained points. Also palette will be constructed using neo-impressionist colors. Based on this palette, we propose color division algorithm that distributes hierarchical point's color to pointillist colors using probability function. Finally, hierarchical points set that applied proposed color division rule is converted into brush strokes that possesses properties such as shape and direction. This rendering algorithm is performed in our proposed system. Our scheme is able to produce a painting with artistic style and be applied to the various platform having the different computing performance and display resolution. This system also can be extended to various imaging devices (IPTV, camera, smart phone, digital photo frame and so on).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2013:EEN, author = "Sang Oh Park and Sung Jo Kim", title = "{ENFFiS}: an enhanced {NAND} flash memory file system for mobile embedded multimedia system", journal = j-TECS, volume = "12", number = "2", pages = "23:1--23:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Since the typical erase cycle limit of a NAND flash memory's block is about 1,000,000, flash memory should be erased as evenly as possible; otherwise, file system hot spots will soon be worn out. This forces a NAND flash memory file system to scan the whole flash memory during its mount rather than saving frequently updated file system information in a fixed area. Since the mount time linearly increases with the size of NAND flash memory, boot times of embedded systems are also linearly increased. In addition, since data loss may occur if a file system terminates abnormally due to unexpected errors, a stability scheme for NAND flash memory file system is in great demand. To resolve these problems, this article suggests an extended logical block called Exblock (Extended Block) and a table called SNode (Snapshot Node) to reduce the mount time and proposes a new journaling scheme to improve stability for an enhanced file system for NAND flash memory storage called ENFFiS (Enhanced NAND Flash memory File System). It also proposes a new cache policy to improve read/write performances. ENFFiS shows better performance than existing file systems in terms of reading, writing, mount time and stability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2013:TAT, author = "Jiayin Li and Meikang Qiu and Jian-Wei Niu and Laurence T. Yang and Yongxin Zhu and Zhong Ming", title = "Thermal-aware task scheduling in {$3$D} chip multiprocessor with real-time constrained workloads", journal = j-TECS, volume = "12", number = "2", pages = "24:1--24:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423642", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Chip multiprocessor (CMP) techniques have been implemented in embedded systems due to tremendous computation requirements. Three-dimension (3D) CMP architecture has been studied recently for integrating more functionalities and providing higher performance. The high temperature on chip is a critical issue for the 3D architecture. In this article, we propose an online thermal prediction model for 3D chips. Using this model, we propose novel task scheduling algorithms based on rotation scheduling to reduce the peak temperature on chip. We consider data dependencies, especially inter-iteration dependencies that are not well considered in most of the current thermal-aware task scheduling algorithms. Our simulation results show that our algorithms can efficiently reduce the peak temperature up to 8.1$^^$ C.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paul:2013:VSI, author = "Anand Paul and Bo-Wei Chen and Karunanithi Bharanitharan and Jhing-Fa Wang", title = "Video search and indexing with reinforcement agent for interactive multimedia services", journal = j-TECS, volume = "12", number = "2", pages = "25:1--25:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this study, we present a video search and indexing system based on the state support vector (SVM) network, video graph, and reinforcement agent for recognizing and organizing video events. In order to enhance the recognition performance of the state SVM network, two innovative techniques are presented: state transition correction and transition quality estimation. The classification results are also merged into the video indexing graph, which facilitates the search speed. A reinforcement algorithm with an efficient scheduling scheme significantly reduces both the power consumption and time. The experimental results show the proposed state SVM network was able to achieve a precision rate as high as 83.83\% and the query results of the indexing graph reached 80\% accuracy. The experiments also demonstrate the performance and feasibility of our system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nam:2013:PAR, author = "Yunyoung Nam and Seungmin Rho and Chulung Lee", title = "Physical activity recognition using multiple sensors embedded in a wearable device", journal = j-TECS, volume = "12", number = "2", pages = "26:1--26:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a wearable intelligence device for activity monitoring applications. We developed and evaluated algorithms to recognize physical activities from data acquired using a 3-axis accelerometer with a single camera worn on a body. The recognition process is performed in two steps: at first the features for defining a human activity are measured by the 3-axis accelerometer sensor and the image sensor embedded in a wearable device. Then, the physical activity corresponding to the measured features is determined by applying the SVM classifier. The 3-axis accelerometer sensor computes the correlation between axes and the magnitude of the FFT for other features of an activity. Acceleration data is classified into nine activity labels. Through the image sensor, multiple optical flow vectors computed on each grid image patch are extracted as features for defining an activity. In the experiments, we showed that an overall accuracy rate of activity recognition based our method was 92.78\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lim:2013:DRS, author = "Seung-Ho Lim and Min Choi and Young Sik Jeong", title = "Data reorganization for scalable video service with embedded mobile devices", journal = j-TECS, volume = "12", number = "2", pages = "27:1--27:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423645", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent development of high-speed wireless networks and embedded systems has enabled the recording and delivery of high-performance multimedia to heterogeneous mobile users. To support heterogeneous mobile users with high-quality multimedia services, scalable video coding was introduced. In the scalable video coding (SVC), through multidimensional scalability, all types of these scalability can be exploited at the same time. However, the generated video sequences of scalable video coding are not adequate for mobile multimedia service systems since its flexibility makes non contiguous storing and retrieval of partial stream data. In this article, we propose efficient scalable video data reorganization for video servicing systems, which consist of video server and mobile clients. For video server, we reorganize scalable video streams taking into account both of decoding dependency and location in disk array storage, where disk array is widely used for storage systems of video server. In the mobile devices, we place substreams with the consideration of NAND flash memory page and block boundaries, which is storage for mobile devices. The experimental results show that the proposed reorganization of scalable video can improve the performance of mobile multimedia service systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kang:2013:AEC, author = "Hyeong-Ju Kang and Heesuk Seo and Jin Kwak", title = "Area-efficient convolutional deinterleaver for mobile {TV} receiver", journal = j-TECS, volume = "12", number = "2", pages = "28:1--28:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423646", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, a single-pointer structure is proposed for the convolutional deinterleavers of mobile TV receivers. To enhance the burst-error correcting capability, the convolutional interleaving and deinterleaving scheme is widely used in mobile TV receivers. However, a convolutional deinterleaver requires many pointer registers. This article introduces a single-pointer structure that reduces the number of pointer registers. Experimental results show that the single-pointer structure reduces the area of the convolutional deinterleaver by 70\% in a mobile TV receiver.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bharanitharan:2013:DMS, author = "K. Bharanitharan and Jiun-Ren Ding and Anand Paul and Kuen-Ming Lee and Ting-Wei Hou", title = "Dependable management system for ubiquitous camera array service in an elder-care center", journal = j-TECS, volume = "12", number = "2", pages = "29:1--29:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423647", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The concept of smart homes (SH) has been extensively popularized, and there are a lot of technologies that need to be continuously utilized and integrated in such a concept. In this article, some applied problems of camera array (CA) in the SH are discussed and solved. Determining how to build an effective management method for CA in order to ensure that user privacy is not encroached upon is an important issue. In SH, the applications of CA are very diversified. We suggest that a satisfactory management method of CA should be based on the open service gateway initiative (OSGi) that includes resource management and monitoring (RMM) and UPnP security for the problems of resources and privacy, respectively. Finally, an applied example of CA is addressed in an elder-care center (EC). Simulation results show that the management strategy and application of CA based on an OSGi is satisfactory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lai:2013:RBR, author = "Chin-Feng Lai and Min Chen and Meikang Qiu and Athanasios V. Vasilakos and Jong Hyuk Park", title = "A {RF4CE}-based remote controller with interactive graphical user interface applied to home automation system", journal = j-TECS, volume = "12", number = "2", pages = "30:1--30:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423648", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the increase in commercial electronic equipment and its complicated control interfaces, how to design an effective and user-friendly control interface has become a topic for many researchers. This research introduces two-directional communication of an interactive graphical user interface on a universal remote control (URC). It is different from current URCs where users must often spend huge amounts of time setting the command codes and encoding each device. With the increase in the number of appliances that the controller needs to manage and the complicated and numerous control buttons, using such controllers often causes difficulties for users. This research employs a cross-platform with integration theories, so when a user wants to connect an appliance, both the appliance end and the controller end will build a two-directional connection through pairing over Radio Frequency for Consumer Electronics (RF4CE). After connection, the system will automatically set the communication protocol between the controller and the device. The appliance will automatically transmit its current state and service in the form of bundles to the controller, then the controller will project it onto an LCD screen. The controller can also show the number of appliances connected to the current position of the user, allowing the user to use one controller to control all home appliances with ease, achieving a simplified and instinctive control interface to build the integrated control environment for commercial appliances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Waluyo:2013:MQS, author = "Agustinus Borgy Waluyo and David Taniar and Bala Srinivasan and Wenny Rahayu", title = "Mobile query services in a participatory embedded sensing environment", journal = j-TECS, volume = "12", number = "2", pages = "31:1--31:??", month = feb, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2423636.2423649", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Feb 28 06:57:27 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A participatory mobile sensing system is designed to enable clients to voluntarily collect environmental data using embedded sensors and a mobile device while going about their daily activities. Due to the spatio-temporal nature of the data, and the significant benefits of the data to the general public, it is necessary to employ an efficient and effective query processing model for the mobile clients to access the data that can be visualized via an interactive multimedia interface. This article introduces a unified on-demand and data broadcast model to serve queries in the context of a mobile sensing system. The contributions of this article include the following: (i) it presents a novel data structure and indexing method to support the system; (ii) it provides flexibility for the client to issue query using on-demand or broadcast channel according to the server load and broadcast schedule; (iii) it enables new data access and processing for the mobile client; and (iv) it is designed for a multiple channels/receivers environment in a 4G wireless network. The proposed model uses a holistic query processing approach for the mobile sensing system that offers substantial efficiency and autonomy for mobile clients when retrieving data. The results of the experiments undertaken affirm the effectiveness of its performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kirsch:2013:ISS, author = "Christoph Kirsch and Vincent Mooney", title = "Introduction to Special Section on Probabilistic Embedded Computing", journal = j-TECS, volume = "12", number = "2s", pages = "86:1--86:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465788", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Palem:2013:TYB, author = "Krishna Palem and Avinash Lingamneni", title = "Ten Years of Building Broken Chips: The Physics and Engineering of Inexact Computing", journal = j-TECS, volume = "12", number = "2s", pages = "87:1--87:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465789", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Well over a decade ago, many believed that an engine of growth driving the semiconductor and computing industries---captured nicely by Gordon Moore's remarkable prophecy (Moore's law)---was speeding towards a dangerous cliff-edge. Ranging from expressions of concern to doomsday scenarios, the exact time when serious hurdles would beset us varied quite a bit---some of the more optimistic warnings giving Moore's law until. Needless to say, a lot of people have spent time and effort with great success to find ways for substantially extending the time when we would encounter the dreaded cliff-edge, if not avoiding it altogether. Faced with this issue, we started approaching this in a decidedly different manner---one which suggested falling off the metaphorical cliff as a design choice, but in a controlled way. This resulted in devices that could switch and produce bits that are correct, namely of having the intended value, only with a probabilistic guarantee. As a result, the results could in fact be incorrect. Such devices and associated circuits and computing structures are now broadly referred to as inexact designs, circuits, and architectures. In this article, we will crystallize the essence of inexactness dating back to 2002 through two key principles that we developed: (i) that of admitting error in a design in return for resource savings, and subsequently (ii) making resource investments in the elements of a hardware platform proportional to the value of information they compute. We will also give a broad overview of a range of inexact designs and hardware concepts that our group and other groups around the world have been developing since, based on these two principles. Despite not being deterministically precise, inexact designs can be significantly more efficient in the energy they consume, their speed of execution, and their area needs, which makes them attractive in application contexts that are resilient to error. Significantly, our development of inexactness will be contrasted against the rich backdrop of traditional approaches aimed at realizing reliable computing from unreliable elements, starting with von Neumann's influential lectures and further developed by Shannon--Weaver and others.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Misailovic:2013:PSP, author = "Sasa Misailovic and Deokhwan Kim and Martin Rinard", title = "Parallelizing Sequential Programs with Statistical Accuracy Tests", journal = j-TECS, volume = "12", number = "2s", pages = "88:1--88:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465790", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present QuickStep, a novel system for parallelizing sequential programs. Unlike standard parallelizing compilers (which are designed to preserve the semantics of the original sequential computation), QuickStep is instead designed to generate (potentially nondeterministic) parallel programs that produce acceptably accurate results acceptably often. The freedom to generate parallel programs whose output may differ (within statistical accuracy bounds) from the output of the sequential program enables a dramatic simplification of the compiler, a dramatic increase in the range of applications that it can parallelize, and a significant expansion in the range of parallel programs that it can legally generate. Results from our benchmark set of applications show that QuickStep can automatically generate acceptably accurate and efficient parallel programs---the automatically generated parallel versions of five of our six benchmark applications run between 5.0 and 7.8 times faster on eight cores than the original sequential versions. These applications and parallelizations contain features (such as the use of modern object-oriented programming constructs or desirable parallelizations with infrequent but acceptable data races) that place them inherently beyond the reach of standard approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sartori:2013:ETE, author = "John Sartori and Rakesh Kumar", title = "Exploiting Timing Error Resilience in Processor Architecture", journal = j-TECS, volume = "12", number = "2s", pages = "89:1--89:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465791", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Escalating variations in modern CMOS designs have become a threat to Moore's law. In light of the increasing costs of standard worst-case design practices, timing speculation has become a popular approach for dealing with static and dynamic non-determinism and increasing yield. Timing speculative architectures allow conservative guardbands to be relaxed, increasing efficiency at the expense of occasional errors, which are corrected or tolerated by an error resilience mechanism. Previous work has proposed circuit- or design-level optimizations that manipulate the error rate behavior of a design to increase the efficiency of timing speculation. In this article, we investigate whether architectural optimizations can also manipulate error rate behavior to significantly increase the effectiveness of timing speculation. To this end, we demonstrate how error rate behavior indeed depends on processor architecture and that architectural optimizations can be used to manipulate the error rate behavior of a processor. Using timing speculation-aware architectural optimizations, we demonstrate enhanced overscaling and up to 29\% additional energy savings for processors that employ Razor-based timing speculation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chippa:2013:MQV, author = "Vinay K. Chippa and Kaushik Roy and Srimat T. Chakradhar and Anand Raghunathan", title = "Managing the Quality vs. Efficiency Trade-off Using Dynamic Effort Scaling", journal = j-TECS, volume = "12", number = "2s", pages = "90:1--90:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465792", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Several current and emerging applications do not have a unique result for a given input; rather, functional correctness is defined in terms of output quality. Recently proposed design techniques exploit the inherent resilience of such applications and achieve improved efficiency (energy or performance) by foregoing correct execution of all the constituent computations. Hardware and software systems that are thus designed may be viewed as scalable effort systems, since they offer the capability to modulate the effort that they expend towards computation, thereby allowing for trade-offs between output quality and efficiency. We propose the concept of Dynamic Effort Scaling (DES), which refers to dynamic management of the control knobs that are exposed by scalable effort systems. We argue the need for DES by observing that the degree of resilience often varies significantly across applications, across datasets, and even within a dataset. We propose a general conceptual framework for DES by formulating it as a feedback control problem, wherein the scaling mechanisms are regulated with the goal of maintaining output quality at or above a specified limit. We present an implementation of Dynamic Effort Scaling for recognition and mining applications and evaluate it for the support vector machines and K-means clustering algorithms under various application scenarios and datasets. Our results clearly demonstrate the benefits of the proposed approach---statically setting the scaling mechanisms leads to either significant error overshoot or significant opportunities for energy savings left on the table unexploited. In contrast, DES is able to effectively regulate the output quality while maximally exploiting the time-varying resiliency in the workload.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Salajegheh:2013:HWS, author = "Mastooreh Salajegheh and Yue Wang and Anxiao (Andrew) Jiang and Erik Learned-Miller and Kevin Fu", title = "Half-Wits: Software Techniques for Low-Voltage Probabilistic Storage on Microcontrollers with {NOR} Flash Memory", journal = j-TECS, volume = "12", number = "2s", pages = "91:1--91:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465793", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This work analyzes the stochastic behavior of writing to embedded flash memory at voltages lower than recommended by a microcontroller's specifications in order to reduce energy consumption. Flash memory integrated within a microcontroller typically requires the entire chip to operate on a common supply voltage almost twice as much as what the CPU portion requires. Our software approach allows the flash memory to tolerate a lower supply voltage so that the CPU may operate in a more energy-efficient manner. Energy-efficient coding algorithms then cope with flash memory writes that behave unpredictably. Our software-only coding algorithms (in-place writes, multiple-place writes, RS-Berger codes, and slow writes) enable reliable storage at low voltages on unmodified hardware by exploiting the electrically cumulative nature of half-written data in write-once bits. For a sensor monitoring application using the MSP430, coding with in-place writes reduces the overall energy consumption by 34\%. In-place writes are competitive when the time spent on low-voltage operations such as computation are at least four times greater than the time spent on writes to flash memory. Our evaluation shows that tightly maintaining the digital abstraction for storage in embedded flash memory comes at a significant cost to energy consumption with minimal gain in reliability. We find our techniques most effective for embedded workloads that have significant duty cycling, rare writes, or energy harvesting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alaghi:2013:SSC, author = "Armin Alaghi and John P. Hayes", title = "Survey of Stochastic Computing", journal = j-TECS, volume = "12", number = "2s", pages = "92:1--92:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465794", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Stochastic computing (SC) was proposed in the 1960s as a low-cost alternative to conventional binary computing. It is unique in that it represents and processes information in the form of digitized probabilities. SC employs very low-complexity arithmetic units which was a primary design concern in the past. Despite this advantage and also its inherent error tolerance, SC was seen as impractical because of very long computation times and relatively low accuracy. However, current technology trends tend to increase uncertainty in circuit behavior and imply a need to better understand, and perhaps exploit, probability in computation. This article surveys SC from a modern perspective where the small size, error resilience, and probabilistic features of SC may compete successfully with conventional methodologies in certain applications. First, we survey the literature and review the key concepts of stochastic number representation and circuit structure. We then describe the design of SC-based circuits and evaluate their advantages and disadvantages. Finally, we give examples of the potential applications of SC and discuss some practical problems that are yet to be solved.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lingamneni:2013:SPI, author = "Avinash Lingamneni and Christian Enz and Krishna Palem and Christian Piguet", title = "Synthesizing Parsimonious Inexact Circuits through Probabilistic Design Techniques", journal = j-TECS, volume = "12", number = "2s", pages = "93:1--93:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465795", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The domain of inexact circuit design, in which accuracy of the circuit can be exchanged for substantial cost (energy, delay, and/or area) savings, has been gathering increasing prominence of late owing to a growing desire for reducing energy consumption of the systems, particularly in the domain of embedded and (portable) multimedia applications. Most of the previous approaches to realizing inexact circuits relied on scaling of circuit parameters (such as supply voltage) taking advantage of an application's error tolerance to achieve the cost and accuracy trade-offs, thus suffering from acute drawbacks of considerable implementation overheads that significantly reduced the gains. In this article, two novel design approaches called Probabilistic Pruning and Probabilistic Logic Minimization are proposed to realize inexact circuits with zero hardware overhead.Extensive simulations on various architectures of critical datapath elements demonstrate that each of the techniques can independently achieve normalized gains as large as $ 2 \times $--$ 9.5 \times $ in energy-delay-area product for relative error magnitude as low as $ 10^{-4} $--$ {10 - 8} $ \% compared to corresponding conventional correct circuits.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cazorla:2013:PPA, author = "Francisco J. Cazorla and Eduardo Qui{\~n}ones and Tullio Vardanega and Liliana Cucu and Benoit Triquet and Guillem Bernat and Emery Berger and Jaume Abella and Franck Wartel and Michael Houston and Luca Santinelli and Leonidas Kosmidis and Code Lo and Dorin Maxim", title = "{PROARTIS}: Probabilistically Analyzable Real-Time Systems", journal = j-TECS, volume = "12", number = "2s", pages = "94:1--94:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465796", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Static timing analysis is the state-of-the-art practice of ascertaining the timing behavior of current-generation real-time embedded systems. The adoption of more complex hardware to respond to the increasing demand for computing power in next-generation systems exacerbates some of the limitations of static timing analysis. In particular, the effort of acquiring (1) detailed information on the hardware to develop an accurate model of its execution latency as well as (2) knowledge of the timing behavior of the program in the presence of varying hardware conditions, such as those dependent on the history of previously executed instructions. We call these problems the timing analysis walls. In this vision-statement article, we present probabilistic timing analysis, a novel approach to the analysis of the timing behavior of next-generation real-time embedded systems. We show how probabilistic timing analysis attacks the timing analysis walls; we then illustrate the mathematical foundations on which this method is based and the challenges we face in the effort of efficiently implementing it. We also present experimental evidence that shows how probabilistic timing analysis reduces the extent of knowledge about the execution platform required to produce probabilistically accurate WCET estimations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Abbas:2013:PTL, author = "Houssam Abbas and Georgios Fainekos and Sriram Sankaranarayanan and Franjo Ivanci{\'c} and Aarti Gupta", title = "Probabilistic Temporal Logic Falsification of Cyber-Physical Systems", journal = j-TECS, volume = "12", number = "2s", pages = "95:1--95:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465797", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a Monte-Carlo optimization technique for finding system behaviors that falsify a metric temporal logic (MTL) property. Our approach performs a random walk over the space of system inputs guided by a robustness metric defined by the MTL property. Robustness is guiding the search for a falsifying behavior by exploring trajectories with smaller robustness values. The resulting testing framework can be applied to a wide class of cyber-physical systems (CPS). We show through experiments on complex system models that using our framework can help automatically falsify properties with more consistency as compared to other means, such as uniform sampling.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Forte:2013:ETA, author = "Domenic Forte and Ankur Srivastava", title = "Energy- and Thermal-Aware Video Coding via Encoder\slash Decoder Workload Balancing", journal = j-TECS, volume = "12", number = "2s", pages = "96:1--96:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465798", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Video coding and compression are essential components of multimedia services but are known to be computationally intensive and energy demanding. Traditional video coding paradigms, predictive and distributed video coding (PVC and DVC), result in excessive computation at either the encoder (PVC) or decoder (DVC). Several recent papers have proposed a hybrid PVC/DVC codec which shares the video coding workload between encoder and decoder. In this article, we propose a controller for such hybrid coders that considers energy and temperature to dynamically split the coding workload of a system comprised of one encoder and one decoder. We also present two heuristic algorithms for determining safe operating temperatures in the controller solution: (1) stable state thermal modeling algorithm, which focuses on long term temperatures, and (2) transient thermal modeling algorithm, which is better for short-term thermal behavior. Results show that the proposed algorithms result in more balanced energy utilization, improve overall system lifetime, and reduce operating temperatures when compared to strictly PVC and DVC systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Uzelac:2013:HBL, author = "Vladimir Uzelac and Aleksandar Milenkovi{\'c}", title = "Hardware-Based Load Value Trace Filtering for On-the-Fly Debugging", journal = j-TECS, volume = "12", number = "2s", pages = "97:1--97:??", month = may, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2465787.2465799", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jun 6 06:53:32 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Capturing program and data traces during program execution unobtrusively on-the-fly is crucial in debugging and testing of cyber-physical systems. However, tracing a complete program unobtrusively is often cost-prohibitive, requiring large on-chip trace buffers and wide trace ports. This article describes a new hardware-based load data value filtering technique called Cache First-access Tracking. Coupled with an effective variable encoding scheme, this technique achieves a significant reduction of load data value traces, from 5.86 to 56.39 times depending on the data cache size, thus enabling cost-effective, unobtrusive on-the-fly tracing and debugging.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2013:SAE, author = "Fengxiang Zhang and Alan Burns", title = "Schedulability analysis of {EDF}-scheduled embedded real-time systems with resource sharing", journal = j-TECS, volume = "12", number = "3", pages = "67:1--67:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Earliest Deadline First (EDF) is the most widely studied optimal dynamic scheduling algorithm for uniprocessor real-time systems. In the existing literature, however, there is no complete exact analysis for EDF scheduling when both resource sharing and release jitter are considered. Since resource sharing and release jitter are important characteristics of embedded real-time systems, a solid theoretical foundation should be provided for EDF scheduled systems. In this paper, we extend traditional processor demand analysis to let arbitrary deadline real-time tasks share non-preemptable resources and suffer release jitter. A complete and exact schedulability analysis for EDF scheduled systems is provided. This analysis is incorporated into QPA (Quick Processor-demand Analysis) which provides an efficient implementation of the exact test.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ben-Asher:2013:UMP, author = "Yosi Ben-Asher and Nadav Rotem", title = "Using memory profile analysis for automatic synthesis of pointers code", journal = j-TECS, volume = "12", number = "3", pages = "68:1--68:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One of the main advantages of high-level synthesis (HLS) is the ability to synthesize circuits that can access multiple memory banks in parallel. Current HLS systems synthesize parallel memory references based on explicit array declarations in the source code. We consider the need to synthesize not only array references but also memory operations targeting pointers and dynamic data structures. This paper describes Automatic Memory Partitioning, a method for automatically synthesizing general data structures (arrays and pointers) into multiple memory banks for increased parallelism and performance. We use source code instrumentation to collect memory traces in order to detect linear memory access patterns. The memory traces are used to split data structures into disjoint memory regions and determine which segments may benefit from parallel memory access. We present an algorithm for allocating memory segments into multiple memory banks. Experiments show significant improvements in performance while conserving the number of memory banks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2013:RAB, author = "Fumin Zhang and Zhenwu Shi and Shayok Mukhopadhyay", title = "Robustness analysis for battery-supported cyber-physical systems", journal = j-TECS, volume = "12", number = "3", pages = "69:1--69:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article establishes a novel analytical approach to quantify robustness of scheduling and battery management for battery supported cyber-physical systems. A dynamic schedulability test is introduced to determine whether tasks are schedulable within a finite time window. The test is used to measure robustness of a real-time scheduling algorithm by evaluating the strength of computing time perturbations that break schedulability at runtime. Robustness of battery management is quantified analytically by an adaptive threshold on the state of charge. The adaptive threshold significantly reduces the false alarm rate for battery management algorithms to decide when a battery needs to be replaced.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Voros:2013:MHD, author = "Nikolaos S. Voros and Michael H{\"u}bner and J{\"u}rgen Becker and Matthias K{\"u}hnle and Florian Thomaitiv and Arnaud Grasset and Paul Brelet and Philippe Bonnot and Fabio Campi and Eberhard Sch{\"u}ler and Henning Sahlbach and Sean Whitty and Rolf Ernst and Enrico Billich and Claudia Tischendorf and Ulrich Heinkel and Frank Ieromnimon and Dimitrios Kritharidis and Axel Schneider and Joachim Knaeblein and Wolfram Putzke-R{\"o}ming", title = "{MORPHEUS}: a heterogeneous dynamically reconfigurable platform for designing highly complex embedded systems", journal = j-TECS, volume = "12", number = "3", pages = "70:1--70:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, system designers are facing the challenge of developing systems that have diverse features, are more complex and more powerful, with less power consumption and reduced time to market. These contradictory constraints have forced technology providers to pursue design solutions that will allow design teams to meet the above design targets. In that respect, this paper introduces an innovative technology platform, called MORPHEUS, which intents to provide complete design framework for dealing with the aforementioned challenges. MORPHEUS consists of a state of the art architecture that encompasses heterogeneous reconfigurable accelerators for implementing on the same hardware architecture applications with varying characteristics and a tool chain that, through a software oriented approach, eases the implementation of highly complex applications with heterogeneous characteristics. The proposed approach has been tested and evaluated through state of the art cases studies borrowed from complementary application domains.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Crenne:2013:CMS, author = "J{\'e}r{\'e}mie Crenne and Romain Vaslin and Guy Gogniat and Jean-Philippe Diguet and Russell Tessier and Deepak Unnikrishnan", title = "Configurable memory security in embedded systems", journal = j-TECS, volume = "12", number = "3", pages = "71:1--71:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "System security is an increasingly important design criterion for many embedded systems. These systems are often portable and more easily attacked than traditional desktop and server computing systems. Key requirements for system security include defenses against physical attacks and lightweight support in terms of area and power consumption. Our new approach to embedded system security focuses on the protection of application loading and secure application execution. During secure application loading, an encrypted application is transferred from on-board flash memory to external double data rate synchronous dynamic random access memory (DDR-SDRAM) via a microprocessor. Following application loading, the core-based security technique provides both confidentiality and authentication for data stored in a microprocessor's system memory. The benefits of our low overhead memory protection approaches are demonstrated using four applications implemented in a field-programmable gate array (FPGA) in an embedded system prototyping platform. Each application requires a collection of tasks with varying memory security requirements. The configurable security core implemented on-chip inside the FPGA with the microprocessor allows for different memory security policies for different application tasks. An average memory saving of 63\% is achieved for the four applications versus a uniform security approach. The lightweight circuitry included to support application loading from flash memory adds about 10\% FPGA area overhead to the processor-based system and main memory security hardware.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2013:AEE, author = "Shaoshan Liu and Richard Neil Pittman and Alessandro Forin and Jean-Luc Gaudiot", title = "Achieving energy efficiency through runtime partial reconfiguration on reconfigurable systems", journal = j-TECS, volume = "12", number = "3", pages = "72:1--72:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One major advantage of reconfigurable computing systems is their ability to reconfigure hardware at runtime. In this paper, we study the feasibility of achieving energy efficiency in reconfigurable computing systems (e.g., FPGAs) through runtime partial reconfiguration (PR) techniques. In the ideal scenario, we use a hardware accelerator to accelerate certain parts of the program execution; when the accelerator is not active, we use partial reconfiguration to unload it to reduce power consumption. Since the reconfiguration process may introduce a high energy overhead, it is unclear whether this approach is efficient. To approach this problem, we first analytically identify the conditions under which partial reconfiguration can reduce energy consumption. Our results indicate that the key to reduce partial reconfiguration energy overhead is to minimize the time overhead of the reconfiguration process. Based on this analysis, we design and implement a fast reconfiguration engine that achieves close-to-ideal throughput on Xilinx Virtex-4 FPGAs. Our fast reconfiguration engine utilizes a master-slave DMA pair to stream data between the SRAM and the Internal Configuration Access Port (ICAP). We experimentally verify our proposed solutions and compare our design to existing energy reduction techniques, such as clock gating. The results of our study show that by using partial reconfiguration to eliminate the power consumption of the accelerator when it is inactive, we can accelerate program execution and at the same time reduce the overall energy consumption by half.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2013:PRS, author = "Qi Dong and Donggang Liu and Peng Ning", title = "Providing {DoS} resistance for signature-based broadcast authentication in sensor networks", journal = j-TECS, volume = "12", number = "3", pages = "73:1--73:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent studies have demonstrated that it is feasible to perform public key cryptographic operations on resource-constrained sensor platforms. However, the significant energy consumption introduced by public key operations makes any public key-based protocol an easy target of Denial-of-Service (DoS) attacks. For example, if digital signature schemes such as ECDSA are used directly for broadcast authentication without further protection, an attacker can simply broadcast fake messages and force the receiving nodes to perform a huge number of unnecessary signature verifications, eventually exhausting their battery power. This paper shows how to mitigate such DoS attacks when digital signatures are used for broadcast authentication in sensor networks. Specifically, this paper first presents two filtering techniques, the group-based filter and the key chain-based filter, to handle the DoS attacks against signature verification. Both methods can significantly reduce the number of unnecessary signature verifications when a sensor node is under DoS attacks. This paper then combines these two filters and proposes a hybrid solution to further improve the performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Falk:2013:RBQ, author = "Joachim Falk and Christian Zebelein and Christian Haubelt and J{\"u}rgen Teich", title = "A rule-based quasi-static scheduling approach for static islands in dynamic dataflow graphs", journal = j-TECS, volume = "12", number = "3", pages = "74:1--74:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, an efficient rule-based clustering algorithm for static dataflow subgraphs in a dynamic dataflow graph is presented. The clustered static dataflow actors are quasi-statically scheduled, in such a way that the global performance in terms of latency and throughput is improved compared to a dynamically scheduled execution, while avoiding the introduction of deadlocks as generated by naive static scheduling approaches. The presented clustering algorithm outperforms previously published approaches by a faster computation and more compact representation of the derived quasi-static schedule. This is achieved by a rule-based approach, which avoids an explicit enumeration of the state space. A formal proof of the correctness of the presented clustering approach is given. Experimental results show significant improvements in both, performance and code size, compared to a state-of-the-art clustering algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ost:2013:PAD, author = "Luciano Ost and Marcelo Mandelli and Gabriel Marchesan Almeida and Leandro Moller and Leandro Soares Indrusiak and Gilles Sassatelli and Pascal Benoit and Manfred Glesner and Michel Robert and Fernando Moraes", title = "Power-aware dynamic mapping heuristics for {NoC}-based {MPSoCs} using a unified model-based approach", journal = j-TECS, volume = "12", number = "3", pages = "75:1--75:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The mapping of tasks to processing elements of an MPSoC has critical impact on system performance and energy consumption. To cope with complex dynamic behavior of applications, it is common to perform task mapping during runtime so that the utilization of processors and interconnect can be taken into account when deciding the allocation of each task. This paper has two major contributions, one of them targeting the general problem of evaluating dynamic mapping heuristics in NoC-based MPSoCs, and another focusing on the specific problem of finding a task mapping that optimizes energy consumption in those architectures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2013:JVP, author = "Tiantian Liu and Chun Jason Xue and Minming Li", title = "Joint variable partitioning and bank selection instruction optimization for partitioned memory architectures", journal = j-TECS, volume = "12", number = "3", pages = "76:1--76:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "About 55\% of all CPUs sold in the world are 8-bit microcontrollers or microprocessors which can only access limited memory space without extending address buses. Partitioned memory with bank switching is a technique to increase memory size without extending address buses. Bank Selection Instructions (BSLs) need to be inserted into the original programs to modify the bank register to point to the desired banks. These BSLs introduce both code size and execution time overheads. In this paper, we partition variables into different banks and insert BSLs at different positions of programs so that the overheads can be minimized. Minimizing speed (execution time) overhead and minimizing space (code size) overhead are two objectives investigated in this paper. A multi-copy approach is also proposed to store multiple copies of several variables on different banks when the memory space allows. It takes the read/write properties of variables into consideration and achieves more BSL overhead reduction. Experiments show that the proposed algorithms can reduce BSL overheads effectively compared to state-of-the-art techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2013:WAR, author = "Jingtong Hu and Chun Jason Xue and Qingfeng Zhuge and Wei-Che Tseng and Edwin H.-M. Sha", title = "Write activity reduction on non-volatile main memories for embedded chip multiprocessors", journal = j-TECS, volume = "12", number = "3", pages = "77:1--77:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent advances in circuit and semiconductor technologies have pushed Non-Volatile Memory (NVM) technologies into a new era. These technologies exhibit appealing properties such as low power consumption, non-volatility, shock-resistivity, and high density. However, there are challenges to which we need answers in the road of applying non-volatile memories as main memory in embedded computer systems. First, when compared with DRAM, NVMs have a limited number of write/erase cycles. Second, write activities on NVM are more expensive than DRAM memory in terms of energy consumption and access latency. Both challenges will benefit from the reduction of the write activities on the NVMs. In this paper, we target embedded Chip Multiprocessors (CMPs) with Scratch Pad Memory (SPM) and non-volatile main memory. We introduce scheduling, data migration, and recomputation techniques to reduce the number of write activities on NVMs. Experimental results show that the proposed methods can reduce the number of writes by 58.46\% on average, which means that the NVM can last 2.8 times as long as before. For Phase Change Memory (PCM), the lifetime is extended from 2.5 years to about 7 years on average and 15 years at the most. Also, the finish time of the tested programs is reduced by an average of 38.07\%, and the energy consumption is reduced by an average of 51.23\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baruah:2013:PST, author = "Sanjoy Baruah", title = "Partitioning sporadic task systems upon memory-constrained multiprocessors", journal = j-TECS, volume = "12", number = "3", pages = "78:1--78:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most prior theoretical research on real-time partitioning algorithms for multiprocessor platforms has focused on ensuring that the cumulative computing requirements of the tasks assigned to each processor does not exceed the processor's processing power. However, computing capacity is often not the only limiting resource: on many multiprocessor platforms each individual computing unit may have limited amounts of multiple additional types of resources (such as local memory) in addition to having limited processing power. We present algorithms for partitioning a collection of sporadic tasks, each characterized by a WCET, a relative deadline, and a period, upon a multiprocessor platform in a manner that is cognizant of such additional constraints as well as the processing capacity constraints.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paolieri:2013:HRT, author = "Marco Paolieri and J{\"o}rg Mische and Stefan Metzlaff and Mike Gerdes and Eduardo Qui{\~n}ones and Sascha Uhrig and Theo Ungerer and Francisco J. Cazorla", title = "A hard real-time capable multi-core {SMT} processor", journal = j-TECS, volume = "12", number = "3", pages = "79:1--79:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hard real-time applications in safety critical domains require high performance and time analyzability. Multi-core processors are an answer to these demands, however task interferences make multi-cores more difficult to analyze from a worst-case execution time point of view than single-core processors. We propose a multi-core SMT processor that ensures a bounded maximum delay a task can suffer due to inter-task interferences. Multiple hard real-time tasks can be executed on different cores together with additional non real-time tasks. Our evaluation shows that the proposed MERASA multi-core provides predictability for hard real-time tasks and also high performance for non hard real-time tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yun:2013:DHS, author = "Jeong-Han Yun and Chul-Joo Kim and Seonggun Kim and Kwang-Moo Choe and Taisook Han", title = "Detection of harmful schizophrenic statements in {Esterel}", journal = j-TECS, volume = "12", number = "3", pages = "80:1--80:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In imperative synchronous languages, a statement is called schizophrenic if it is executed more than once in a single clock. When a schizophrenic statement is translated into a circuit, the circuit can behave abnormally because of the multiple executions. To solve the problems caused by schizophrenic statements, compilers duplicate the statements to avoid multiple executions. Esterel is an imperative synchronous language. Schizophrenic statements in Esterel are considered to occur due to the instantaneous reentrance of local signal declarations or parallel statements. However, if the corresponding circuit of a schizophrenic statement behaves normally, it is harmless and thus curing is not necessary. In this paper, we identify the conditions under which a schizophrenic statement of the Esterel program must be cured during circuit translation. We also propose an algorithm to detect schizophrenic statements that have to be cured on the control flow graphs (CFGs) of source codes. Our algorithm detects all schizophrenic statements that have to be cured and results in fewer false alarms on the benchmark programs used in the previous work. It is simple and based on the CFG of a program, implying that it can be merged into existing compilers easily.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baek:2013:EEH, author = "Seungjae Baek and Jongmoo Choi and Donghee Lee and Sam H. Noh", title = "Energy-efficient and high-performance software architecture for storage class memory", journal = j-TECS, volume = "12", number = "3", pages = "81:1--81:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, interest in incorporating Storage Class Memory (SCM), which blurs the distinction between memory and storage, into mainstream computing has been increasing rapidly. In this paper, we address the emerging questions regarding the use of SCM. Based on an embedded platform that employs FeRAM, a type of SCM, we present our findings. In summary, by introducing SCM, power efficiency improves while performance is degraded. We also show that such performance degradations may be removed with operating system level schemes that fully exploit the characteristics of SCM. Finally, we present permanent computing that supports lightweight system on/off capabilities by using SCM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2013:HPL, author = "Dongwon Lee and Marilyn Wolf and Shuvra S. Bhattacharyya", title = "High-performance and low-energy buffer mapping method for multiprocessor {DSP} systems", journal = j-TECS, volume = "12", number = "3", pages = "82:1--82:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "When implementing digital signal processing (DSP) applications onto multiprocessor systems, one significant problem in the viewpoints of performance is the memory wall. In this paper, to help alleviate the memory wall problem, we propose a novel, high-performance buffer mapping policy for SDF-represented DSP applications on bus-based multiprocessor systems that support the shared-memory programming model. The proposed policy exploits the bank concurrency of the DRAM main memory system according to the analysis of hierarchical parallelism. Energy consumption is also a critical parameter, especially in battery-based embedded computing systems. In this paper, we apply a synchronization back-off scheme on the top of the proposed high-performance buffer mapping policy to reduce energy consumption. The energy saving is attained by minimizing the number of non-essential synchronization transactions. We measure throughput and energy consumption on both synthetic and real benchmarks. The simulation results show that the proposed buffer mapping policy is very useful in terms of performance, especially in memory-intensive applications where the total execution time of computational tasks is relatively small compared to that of memory operations. In addition, the proposed synchronization back-off scheme provides a reduction in the number of synchronization transactions without degrading performance, which results in system energy saving.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tripakis:2013:CSD, author = "Stavros Tripakis and Dai Bui and Marc Geilen and Bert Rodiers and Edward A. Lee", title = "Compositionality in synchronous data flow: Modular code generation from hierarchical {SDF} graphs", journal = j-TECS, volume = "12", number = "3", pages = "83:1--83:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hierarchical SDF models are not compositional: a composite SDF actor cannot be represented as an atomic SDF actor without loss of information that can lead to rate inconsistency or deadlock. Motivated by the need for incremental and modular code generation from hierarchical SDF models, we introduce in this paper DSSF profiles. DSSF (Deterministic SDF with Shared FIFOs) forms a compositional abstraction of composite actors that can be used for modular compilation. We provide algorithms for automatic synthesis of non-monolithic DSSF profiles of composite actors given DSSF profiles of their sub-actors. We show how different trade-offs can be explored when synthesizing such profiles, in terms of compactness (keeping the size of the generated DSSF profile small) versus reusability (maintaining necessary information to preserve rate consistency and deadlock-absence) as well as algorithmic complexity. We show that our method guarantees maximal reusability and report on a prototype implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zimmerman:2013:MBR, author = "Andrew T. Zimmerman and Jerome P. Lynch and Frank T. Ferrese", title = "Market-based resource allocation for distributed data processing in wireless sensor networks", journal = j-TECS, volume = "12", number = "3", pages = "84:1--84:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In recent years, improved wireless technologies have enabled the low-cost deployment of large numbers of sensors for a wide range of monitoring applications. Because of the computational resources (processing capability, storage capacity, etc.) collocated with each sensor in a wireless network, it is often possible to perform advanced data analysis tasks autonomously and in-network, eliminating the need for the post-processing of sensor data. With new parallel algorithms being developed for in-network computation, it has become necessary to create a framework in which all of a wireless network's scarce resources (CPU time, wireless bandwidth, storage capacity, battery power, etc.) can be best utilized in the midst of competing computational requirements. In this study, a market-based method is developed to autonomously distribute these scarce network resources across various computational tasks with competing objectives and/or resource demands. This method is experimentally validated on a network of wireless sensing prototypes, where it is shown to be capable of Pareto-optimally allocating scarce network resources. Then, it is applied to the real-world problem of rupture detection in shipboard chilled water systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mu:2013:POS, author = "Jingqing Mu and Karthik Shankar and Roman Lysecky", title = "Profiling and online system-level performance and power estimation for dynamically adaptable embedded systems", journal = j-TECS, volume = "12", number = "3", pages = "85:1--85:??", month = mar, year = "2013", CODEN = "????", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 28 17:38:27 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Significant research has demonstrated the performance and power benefits of runtime dynamic reconfiguration of FPGAs and microprocessor/FPGA devices. For dynamically reconfigurable systems, in which the selection of hardware coprocessors to implement within the FPGA is determined at runtime, online estimation methods are needed to evaluate the performance and power consumption impact of the hardware coprocessor selection. In this paper, we present a profile assisted online system-level performance and power estimation framework for estimating the speedup and power consumption of dynamically reconfigurable embedded systems. We evaluate the accuracy and fidelity of our online estimation framework for dynamic hardware kernel selection to maximize performance or minimize the system power consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jafari:2013:ISS, author = "Roozbeh Jafari and John Lach and Majid Sarrafzadeh and William Kaiser", title = "Introduction to the special section on wireless health systems", journal = j-TECS, volume = "12", number = "4", pages = "98:1--98:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485986", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wijsman:2013:TME, author = "Jacqueline Wijsman and Bernard Grundlehner and Julien Penders and Hermie Hermens", title = "Trapezius muscle {EMG} as predictor of mental stress", journal = j-TECS, volume = "12", number = "4", pages = "99:1--99:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485987", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Stress is a growing problem in society and can cause musculoskeletal complaints. It would be useful to measure stress for prevention of stress-related health problems. An experiment is described in which EMG signals of the upper trapezius muscle were measured with a wireless system during three different stressful conditions: a calculation task (the Norinder test), a logical puzzle task and a memory task. The latter two tests were newly designed and aimed at creating circumstances that are similar to work stress. Amplitudes of the EMG signals were significantly higher during stress compared to rest (+2.6\% of reference contraction level) and relative time with EMG gaps was lower during stress (-14.3\% of time). Also, mean and median frequencies were significantly lower during stress than during rest (-8.6 and -8.8 Hz, respectively). EMG amplitude increased not only from rest to stress conditions, but also during stressful conditions and decreased during relaxation periods. EMG features correlated with subjectively indicated stress levels (correlations of 0.32 with RMS and -0.32 with relative gaptime). The results indicate that EMG is a useful parameter to detect stress. Together with other physiological sensors, EMG sensors can be included in a wireless system for ambulatory monitoring of stress levels.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wouhaybi:2013:ECM, author = "Rita H. Wouhaybi and Mark D. Yarvis and Sangita Sharma and Philip Muse and Chieh-Yih Wan and Sai Prasad and Lenitra Durham and Ritu Sahni and Robert Norton and Merlin Curry and Holly Jimison and Richard Harper and Robert A. Lowe", title = "Experiences with context management in emergency medicine", journal = j-TECS, volume = "12", number = "4", pages = "100:1--100:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485988", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In emergency medicine, patient care is intense and stressful, often requiring paramedics to consult with remote physicians to convey the patient's condition. We present a framework for context-management in telemedicine developed in collaboration between engineers, physicians, and paramedics. We describe a mobile platform and embedded wireless sensors to capture physiological and audio context into a comprehensive patient record, accessible locally and remotely. We describe a first evaluation of this technology by trained paramedics in simulated scenarios and evaluate key aspects of system performance. Early results suggest that wireless sensing can provide reliable and low latency data both locally and to remote physicians. In addition, audio context capture is a promising approach to capturing a comprehensive patient record, with a low rate of medically important errors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Valtazanos:2013:LSS, author = "Aris Valtazanos and D. K. Arvind and Subramanian Ramamoorthy", title = "Latent space segmentation for mobile gait analysis", journal = j-TECS, volume = "12", number = "4", pages = "101:1--101:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485989", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "An unsupervised learning algorithm is presented for segmentation and evaluation of motion data from the on-body Orient wireless motion capture system for mobile gait analysis. The algorithm is model-free and operates on the latent space of the motion, by first aggregating all the sensor data into a single vector, and then modeling them on a low-dimensional manifold to perform segmentation. The proposed approach is contrasted to a basic, model-based algorithm, which operates directly on the joint angles computed by the Orient sensor devices. The latent space algorithm is shown to be capable of retrieving qualitative features of the motion even in the face of noisy or incomplete sensor readings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Masse:2013:MWE, author = "Fabien Mass{\'e} and Martien {Van Bussel} and Aline Serteyn and Johan Arends and Julien Penders", title = "Miniaturized wireless {ECG} monitor for real-time detection of epileptic seizures", journal = j-TECS, volume = "12", number = "4", pages = "102:1--102:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485990", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent advances in miniaturization of ultra-low power components allow for more intelligent wearable health monitors. The development and evaluation of a wireless wearable electrocardiogram (ECG) monitor to detect epileptic seizures from changes in the cardiac rhythm is described. The ECG data are analyzed by embedded algorithms: a robust beat-detection algorithm combined with a real-time epileptic seizure detector. In its current implementation, the proposed prototype is 52$ \times $ 36$ \times $ 15mm$^3$, and has an autonomy of one day. Based on data collected on the first three epilepsy patients, preliminary clinical results are provided. Wireless, miniaturized and comfortable, this prototype opens new perspectives for health monitoring.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chi:2013:WNE, author = "Yu M. Chi and Patrick Ng and Gert Cauwenberghs", title = "Wireless noncontact {ECG} and {EEG} biopotential sensors", journal = j-TECS, volume = "12", number = "4", pages = "103:1--103:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wearable, unobtrusive and patient friendly physiological sensors will be a key driving force in the wireless health revolution. Cardiac (ECG) and brain (EEG) signals are two important signal modalities indicative of healthy and diseased states of body and mind that directly benefit from long-term monitoring. Despite advancements in wireless and embedded electronics technology, however, ECG/EEG monitoring devices still face problems with patient compliance and comfort from the use wet/gel electrodes. We have developed two wireless biopotential instrumentation systems using noncontact electrodes that can operate without direct skin contact and through thin layers of fabric. The first system is a general purpose replacement for traditional ECG/EEG telemetry systems and the second is a compact, fully self-contained wireless ECG tag. All of the issues relating to the design of low noise, high performance noncontact sensors are discussed along with full technical details, circuit schematics and construction techniques. The noncontact electrode has been integrated into both a wearable ECG chest harness as well an EEG headband and characterized in a battery of experiments that represent potential health applications including resting ECG, exercise ECG and EEG directly against standard clinical adhesive Ag\slash AgCl electrodes. With careful design and secure mechanical harnesses the noncontact sensor is capable of approaching the quality of conventional electrodes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cardo:2013:ISS, author = "Jos{\'e} Flich Cardo and Maurizio Palesi", title = "Introduction to the special section on on-chip and off-chip network architectures", journal = j-TECS, volume = "12", number = "4", pages = "104:1--104:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485992", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yu:2013:ANC, author = "Qiaoyan Yu and Meilin Zhang and Paul Ampadu", title = "Addressing network-on-chip router transient errors with inherent information redundancy", journal = j-TECS, volume = "12", number = "4", pages = "105:1--105:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485993", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We exploit the inherent information redundancy in the control path of Network-on-Chip (NoC) routers to manage transient errors, preventing packet loss and misrouting. Outputs of the routing arbitration units in NoC routers can be used to determine arbitration failures, because the valid arbitration outputs are a subset of all possible values. This feature is exploited to detect and correct logic and register errors in the router arbitration control path. The proposed method is complementary to other error management methods for NoC routers. An analytical reliability model of our method is provided, including parameters such as logic unit size, different error rates for logic gates and registers, and the location of faulty elements. Compared to triple-modular redundancy (TMR), the proposed method improves the arbiter reliability by two orders of magnitude while reducing the total area and power by 43\% and 64\%, respectively. In the presented case studies, two traffic traces from the PARSEC benchmark suite are used to evaluate the average latency and energy consumption. Simulations performed on a 4$ \times $ 4 NoC show that our method reduces the average latency by up to 50\% and reduces average energy by up to 70\% compared to other methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghiribaldi:2013:CST, author = "Alberto Ghiribaldi and Daniele Ludovici and Francisco Trivi{\~n}o and Alessandro Strano and Jos{\'e} Flich and Jos{\'e} Luis S{\'a}nchez and Francisco Alfaro and Michele Favalli and Davide Bertozzi", title = "A complete self-testing and self-configuring {NoC} infrastructure for cost-effective {MPSoCs}", journal = j-TECS, volume = "12", number = "4", pages = "106:1--106:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485994", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", note = "See comment \cite{Bishnoi:2015:BCC}.", abstract = "Networks-on-chip need to survive to manufacturing faults in order to sustain yield. An effective testing and configuration strategy however implies two opposite requirements. One one hand, a fast and scalable built-in self-testing and self-diagnosis procedure has to be carried out concurrently at NoC switches. On the other hand, programming the NoC routing mechanism to go around faulty links and switches can be optimally performed by a centralized controller with global network visibility. To the best of our knowledge, this article proposes for the first time a global network testing and configuration strategy that meets the opposite requirements by means of a fault-tolerant dual network architecture and a fast configuration algorithm for the most common failure patterns. Experimental results report an area overhead as low as 12.5\% with respect to the baseline switch architecture while achieving a high degree of fault tolerance. In fact, even when multiple stuck-at faults are considered, the capability of fault masking by the dual network is always over 80\%, and the support for multiple link failures is more than 90\% in presence of two unusable links in the main network with minimum set-up times.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sem-Jacobsen:2013:ELC, author = "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and Tor Skeie and Alessandro Strano and Davide Bertozzi", title = "An efficient, low-cost routing framework for convex mesh partitions to support virtualization", journal = j-TECS, volume = "12", number = "4", pages = "107:1--107:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485995", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "At the core of an efficient chip multiprocessors (CMP) is support for unicast and multicast routing, low implementation costs, and the ability to isolate concurrent applications with maximum utilization of the CMP. We present an efficient logic-based unicast and multicast routing algorithm that guarantees isolation of local application traffic within any near-convex region on the chip, and the algorithms to recognize supported partitions and configure the cores accordingly. Evaluations show that the routing algorithm has a 57{\&}percent; more compact implementation than a recent multicast solution with the same coverage, and it achieves 5{\&}percent; higher throughput with 13{\&}percent; lower latency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seiculescu:2013:DBE, author = "Ciprian Seiculescu and Dara Rahmati and Srinivasan Murali and Hamid Sarbazi-Azad and Luca Benini and Giovanni {De Micheli}", title = "Designing best effort networks-on-chip to meet hard latency constraints", journal = j-TECS, volume = "12", number = "4", pages = "108:1--108:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485996", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many classes of applications require Quality of Service (QoS) guarantees from the system interconnect. In Networks-on-Chip (NoC) QoS guarantees usually translate into bandwidth and latency constraints for the traffic flows and require hardware support in the NoC fabric and its interfaces. In this article we present a novel NoC synthesis framework to automatically build networks that meet hard latency constraints of end-to-end traffic streams without requiring specialized hardware for the network components. The hard latency constraints are met by carefully designing the NoC topology and selecting the appropriate routes for flow using lean best-effort network components. We perform experiments on several System on Chip (SoC) benchmarks. We compared against a topology synthesis method with no support for real-time constraints and we show that the proposed method can produce topologies that can meet significantly tighter worst case latency constraints (on average 44\%). We also show that the tightest worst case latency can be provided with little overhead on power consumption (on average 8.5\%).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zahavi:2013:GNL, author = "Eitan Zahavi and Israel Cidon and Avinoam Kolodny", title = "{Gana}: a novel low-cost conflict-free {NoC} architecture", journal = j-TECS, volume = "12", number = "4", pages = "109:1--109:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485997", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Similar to off-chip networks, current NoC architectures are based on the store and forward of uncoordinated end-to-end packet transmissions through autonomous buffered routers. However, the monolithic nature and the small physical dimensions of on chip networks open up the opportunity for much more tightly controlled architectures. We present GANA, a new Global Arbiter NoC Architecture. In GANA, the transmission of end-to-end data is timed by a global arbiter in a way that avoids any queuing in the network. The arbitration takes into account the complete transfer of the end-to-end packets through the entire network path, avoiding any intermediate queuing and hop-by-hop packet arbitration. Consequently, buffers and arbiters are no longer required in the routers, resulting in smaller area and low power consumption. It is demonstrated through detailed design and synthesis that the additional area of the central arbiter and the control path are negligible in comparison to the provided area saving. For example, an 8$ \times $ 8 GANA consumes only 16\% of the area of an equivalent autonomous NoC while providing a better end-to-end throughput. The end-to-end performance of GANA at high network loads is typically much better than in a distributed-control NOC, because resource contention and queuing in the network are avoided. This comes at the cost of a few percentage increase in latency at light loads due to the additional arbitration phase. GANA architecture combines the inherent benefits of a network (parallelism and spatial reuse of links) with the inherent benefits of high integration (global view of the system state, central control, and synchronization). The scalability of GANA is evaluated analytically, showing that it can be superior to fully-distributed networks in systems up to a size of about 100 modules manufactured in 45nm technology, which can be used today as well as in the foreseeable future.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2013:NCA, author = "Dongki Kim and Sungjoo Yoo and Sunggu Lee", title = "A network congestion-aware memory subsystem for manycore", journal = j-TECS, volume = "12", number = "4", pages = "110:1--110:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485998", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The network-on-chip (NoC) plays a crucial role in memory performance due to the fact that it can handle the majority of traffics from/to the DRAM memory controllers. However, there has been little work on the interplay between the NoC and memory controllers. In this article, we address a problem called network congestion-induced memory blocking and propose a novel memory controller, which performs memory access scheduling and network entry control in a network congestion-aware manner. In case of network congestion, in order to avoid performance degradation due to the blocking caused by data bound for congested regions in the NoC, the proposed memory controller favors requests and data associated with uncongested regions. In addition, in order to avoid the fairness problem of such a policy, we also propose a gradual method, which enables a trade-off between performance (in memory utilization) and fairness (in memory access latency). Experimental results show that the proposed method can offer up to 1.76 to 2.99 times improvement in memory utilization in the latency-tolerant designs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sem-Jacobsen:2013:EPE, author = "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and Alessandro Strano and Tor Skeie and Davide Bertozzi and Francisco Gilabert", title = "Enabling power efficiency through dynamic rerouting on-chip", journal = j-TECS, volume = "12", number = "4", pages = "111:1--111:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2485999", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Networks-on-chip (NoCs) are key components in many-core chip designs. Dynamic power-awareness is a new challenge present in NoCs that must be efficiently handled by the routing functionality as it introduces irregularities in the commonly used 2-D meshes. In this article, we propose a logic-based routing algorithm, iFDOR, oriented towards dynamic powering down one region within every application partition on the chip through dynamic rerouting, with low implementation costs. Results show that we can successfully shutdown an arbitrary rectangular region within an application partition without significant impact on network performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anonymous:2013:AOS, author = "Anonymous", title = "Abstracts: Online Supplements Volume 12, Number 1s, Volume 12, Number 2s", journal = j-TECS, volume = "12", number = "4", pages = "112:1--112:??", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2485984.2499550", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 1 18:28:35 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2013:SDM, author = "Mo Li and Zheng Yang and Yunhao Liu", title = "Sea depth measurement with restricted floating sensors", journal = j-TECS, volume = "13", number = "1", pages = "1:1--1:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512448", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sea depth monitoring is a critical task for ensuring safe operation of harbors. Traditional schemes largely rely on labor-intensive work and expensive hardware. This study explores the possibility of deploying networked sensors on the surface of the sea, measuring and reporting the sea depth of given areas. We propose a Restricted Floating Sensors (RFS) model in which sensor nodes are anchored to the sea bottom, floating within a restricted area. Distinguished from traditional stationary or mobile sensor networks, the RFS network consists of sensor nodes with restricted mobility. We construct the network model and elaborate the corresponding localization problem. We show that by locating such RFS sensors, the sea depth can be estimated without the help of any extra ranging devices. A prototype system with 25 Telos sensor nodes is deployed to validate this design. We also examine the efficiency and scalability of this design through large-scale simulations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anand:2013:CCS, author = "Madhukar Anand and Sebastian Fischmeister and Insup Lee", title = "A comparison of compositional schedulability analysis techniques for hierarchical real-time systems", journal = j-TECS, volume = "13", number = "1", pages = "2:1--2:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501626.2501629", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Schedulability analysis of hierarchical real-time embedded systems involves defining interfaces that represent the underlying system faithfully and then compositionally analyzing those interfaces. Whereas commonly used abstractions, such as periodic and sporadic tasks and their interfaces, are simple and well studied, results for more complex and expressive abstractions and interfaces based on task graphs and automata are limited. One contributory factor may be the hardness of compositional schedulability analysis with task graphs and automata. Recently, conditional task models, such as the recurring branching task model, have been introduced with the goal of reaching a middle ground in the trade-off between expressivity and ease of analysis. Consequently, techniques for compositional analysis with conditional models have also been proposed, and each offer different advantages. In this work, we revisit those techniques, compare their advantages using an automotive case study, and identify limitations that would need to be addressed before adopting these techniques for use with real-world problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{MartinezSantos:2013:LSA, author = "Juan Carlos {Martinez Santos} and Yunsi Fei", title = "Leveraging speculative architectures for runtime program validation", journal = j-TECS, volume = "13", number = "1", pages = "3:1--3:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512456", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Program execution can be tampered with by malicious attackers through exploiting software vulnerabilities. Changing the program behavior by compromising control data and decision data has become the most serious threat in computer system security. Although several hardware approaches have been presented to validate program execution, they either incur great hardware overhead or introduce false alarms. We propose a new hardware-based approach by leveraging the existing speculative architectures for runtime program validation. The on-chip branch target buffer (BTB) is utilized as a cache of the legitimate control flow transfers stored in a secure memory region. In addition, the BTB is extended to store the correct program path information. At each indirect branch site, the BTB is used to validate the decision history of previous conditional branches and monitor the following execution path at runtime. Implementation of this approach is transparent to the upper operating system and programs. Thus, it is applicable to legacy code. Because of good code locality of the executable programs and effectiveness of branch prediction, the frequency of control-flow validations against the secure off-chip memory is low. Our experimental results show a negligible performance penalty and small storage overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsieh:2013:TAM, author = "Ang-Chih Hsieh and Tingting Hwang", title = "Thermal-aware memory mapping in {$3$D} designs", journal = j-TECS, volume = "13", number = "1", pages = "4:1--4:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512457", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "DRAM is usually used as main memory for program execution. The thermal behavior of a memory block in a 3D SIP is affected not only by the power behavior but also the heat dissipating ability of that block. The power behavior of a block is related to the applications run on the system, while the heat dissipating ability is determined by the number of tier and the position the block locates. Therefore, a thermal-aware memory allocator should consider the following two points. First, the allocator should consider not only the power behavior of a logic block but also the physical location during memory mapping and second, the changing temperature of a physical block during execution of programs. In this article, we will propose a memory mapping algorithm taking into consideration these two points. Our technique can be classified as static thermal management to be applied to embedded software designs. Experiments show that for single-core systems, our method can reduce the temperature of memory system by 17.1${}^\circ $C, as compared to a straightforward mapping in the best case, and 13.3${}^\circ $C on average. For systems with four cores, the temperature reductions are 9.9${}^\circ $C and 11.6${}^\circ $C on average when L1 cache of each core is set to 4KB and 8KB, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bai:2013:SOS, author = "Ke Bai and Aviral Shrivastava", title = "A software-only scheme for managing heap data on limited local memory ({LLM}) multicore processors", journal = j-TECS, volume = "13", number = "1", pages = "5:1--5:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501626.2501632", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a scheme for managing heap data in the local memory present in each core of a limited local memory (LLM) multicore architecture. Although managing heap data semi-automatically with software cache is feasible, it may require modifications of other thread codes. Crossthread modifications are very difficult to code and debug, and will become more complex and challenging as we increase the number of cores. In this article, we propose an intuitive programming interface, which is an automatic and scalable scheme for heap data management. Besides, for embedded applications, where the maximum heap size can be profiled, we propose several optimizations on our heap management to significantly decrease the library overheads. Our experiments on several benchmarks from MiBench executing on the Sony Playstation 3 show that our scheme is natural to use, and if we know the maximum size of heap data, our optimizations can improve application performance by an average of 14\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gu:2013:DDL, author = "Ji Gu and Hui Guo and Tohru Ishihara", title = "{DLIC}: Decoded loop instructions caching for energy-aware embedded processors", journal = j-TECS, volume = "13", number = "1", pages = "6:1--6:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512464", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the explosive proliferation of embedded systems, especially through countless portable devices and wireless equipment used, embedded systems have become indispensable to the modern society and people's life. Those devices are often battery driven. Therefore, low energy consumption in embedded processors is important and becomes critical in step with the system complexity. The on-chip instruction cache (I-cache) is usually the most energy-consuming component on the processor chip due to its large size and frequent access operations. To reduce such energy consumption, the existing loop cache approaches use a tiny decoded cache to filter the I-cache access and instruction decode activity for repeated loop iterations. However, such designs are effective for small and simple loops, and only suitable for DSP kernel-like applications. They are not effectual for many embedded applications where complex loops are common. In this article, we propose a decoded loop instruction cache (DLIC) that is small, hence energy efficient, yet can capture most loops, including large nested ones with branch executions, so that a significant amount of I-cache accesses and instruction decoding can be eradicated. The experiments on a set of embedded benchmarks show that our proposed DLIC scheme can reduce energy consumption by up to 87\% as compared to normal cache-only design. On average, 66\% energy can be saved on instruction fetching and decoding, while at a performance overhead of only 1.4\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stanley-Marbell:2013:LPP, author = "Phillip Stanley-Marbell", title = "{L24}: Parallelism, performance, energy efficiency, and cost trade-offs in future sensor platforms", journal = j-TECS, volume = "13", number = "1", pages = "7:1--7:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512465", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Networks of sensors must process large amounts of intermittently-available data in situ. This motivates the investigation of means for achieving high performance when required, but ultra-low-power dissipation when idle. One approach to this challenge is the use of embedded multiprocessor systems, leading to trade-offs between parallelism, performance, energy efficiency, and cost. To evaluate these trade-offs and to gain insight for future system designs, this article presents the design, implementation, and evaluation of a miniature, energy-scalable, 24-processor module, L24, for use in embedded sensor systems. Analytic results and empirical evidence motivating such embedded multiprocessors is provided, and a parallel fixed-point fast Fourier transform implementation is presented. This application is used as a challenging but realistic evaluator of the presented hardware platform. Through a combination of hardware measurements, instruction-level microarchitectural simulation, and analytic modeling, it is demonstrated that the platform provides idle power dissipation over an order of magnitude lower than systems employing a monolithic processor of equivalent performance, while dynamic power dissipation remains competitive. Taking into account both application computation and interprocessor communication demands, it is shown that there may exist an optimum operating voltage that minimizes either time-to-solution, energy usage, or the energy-delay product. This optimum operating point is formulated analytically, calibrated with system measurements, and evaluated for the hardware platform and application presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{So:2013:STI, author = "Won So and Alexander G. Dean", title = "Software thread integration for instruction-level parallelism", journal = j-TECS, volume = "13", number = "1", pages = "8:1--8:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512466", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multimedia applications require a significantly higher level of performance than previous workloads of embedded systems. They have driven digital signal processor (DSP) makers to adopt high-performance architectures like VLIW (Very-Long Instruction Word). Despite many efforts to exploit instruction-level parallelism (ILP) in the application, the speed is a fraction of what it could be, limited by the difficulty of finding enough independent instructions to keep all of the processor's functional units busy. This article proposes Software Thread Integration (STI) for instruction-level parallelism. STI is a software technique for interleaving multiple threads of control into a single implicitly multithreaded one. We use STI to improve the performance on ILP processors by merging parallel procedures into one, increasing the compiler's scope and hence allowing it to create a more efficient instruction schedule. Assuming the parallel procedures are given, we define a methodology for finding the best performing integrated procedure with a minimum compilation time. We quantitatively estimate the performance impact of integration, allowing various integration scenarios to be compared and ranked via profitability analysis. During integration of threads, different ILP-improving code transformations are selectively applied according to the control structure and the ILP characteristics of the code, driven by interactions with software pipelining. The estimated profitability is verified and corrected by an iterative compilation approach, compensating for possible estimation inaccuracy. Our modeling methods combined with limited compilation quickly find the best integration scenario without requiring exhaustive integration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghasemzadeh:2013:ULP, author = "Hassan Ghasemzadeh and Roozbeh Jafari", title = "Ultra low-power signal processing in wearable monitoring systems: a tiered screening architecture with optimal bit resolution", journal = j-TECS, volume = "13", number = "1", pages = "9:1--9:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501626.2501636", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in technology have led to the development of wearable sensing, computing, and communication devices that can be woven into the physical environment of our daily lives, enabling a large variety of new applications in several domains, including wellness and health care. Despite their tremendous potential to impact our lives, wearable health monitoring systems face a number of hurdles to become a reality. The enabling processors and architectures demand a large amount of energy, requiring sizable batteries. In this article, we propose a granular decision-making architecture for physical movement monitoring applications. The module can be viewed as a tiered wake-up circuitry. This decision-making module, in combination with a low-power microcontroller, allows for significant power saving through an ultra low-power processing architecture. The significant power saving is achieved by performing a preliminary ultra low-power signal processing, and hence, keeping the microcontroller off when the incoming signal is not of interest. The preliminary signal processing is performed by a set of special-purpose functional units, also called screening blocks, that implement template matching functions. We formulate and solve an optimization problem for selecting screening blocks such that the accuracy requirements of the signal processing are accommodated while the total power is minimized. Our experimental results on real data from wearable motion sensors show that the proposed algorithm achieves 63.2\% energy saving while maintaining a sensitivity of 94.3\% in recognizing transitional actions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2013:RED, author = "Yuan-Hao Chang and Ming-Chang Yang and Tei-Wei Kuo and Ren-Hung Hwang", title = "A reliability enhancement design under the flash translation layer for {MLC}-based flash-memory storage systems", journal = j-TECS, volume = "13", number = "1", pages = "10:1--10:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512467", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Although flash memory has gained very strong momentum in the storage market, the reliability of flash-memory chips has been dropped significantly in the past years. This article presents a reliability enhancement design under the flash management layer (i.e., flash translation layer) to address this concern so as to reduce the design complexity of flash-memory management software/firmware and to improve the maintainability and portability of existing and future products. In particular, a log-based write strategy with a hash-based caching policy is proposed to provide extra ECC redundancy and performance improvement. Strategies for bad block management are also presented. The failure rate of flash-memory storage systems is analyzed with the considerations of bit errors. The proposed design is later evaluated by a series of experiments based on realistic traces. It was shown that the proposed approach could significantly improve the reliability of flash memory with very limited system overheads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chao:2013:TLA, author = "Chih-Hao Chao and Kun-Chih Chen and Tsu-Chu Yin and Shu-Yen Lin and An-Yeu (Andy) Wu", title = "Transport-layer-assisted routing for runtime thermal management of {$3$D} {NoC} systems", journal = j-TECS, volume = "13", number = "1", pages = "11:1--11:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512468", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To ensure thermal safety and to avoid performance degradation from temperature regulation in 3D NoC, we propose a new temperature-traffic control framework. The framework contains the vertical throttling-based runtime thermal management (VT-RTM) scheme and the transport-layer assisted routing (TLAR) scheme. VT-RTM scheme increases the cooling speed and maintains high availability. TLAR scheme sustains the throughput of the nonstationary irregular mesh network. In our experiments, VT-RTM scheme reduces cooling time by 84\% and achieves 98\% network availability; the overall performance impact is around 8\% of traditional schemes. TLAR scheme reduces average latency by 35\% and improves sustainable throughput by 76\%", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kent:2013:CPS, author = "Christopher G. Kent and Joann M. Paul", title = "Contextual partitioning for speech recognition", journal = j-TECS, volume = "13", number = "1", pages = "12:1--12:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2501626.2501639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many multicore computers are single-user devices, creating the potential to partition by situational usage contexts, similar to how the human brain is organized. Contextual partitioning (CP) permits multiple simplified versions of the same task to exist in parallel, with selection tied to the context in use. We introduce CP for speech recognition, specifically targeted at user interfaces in handheld embedded devices. Contexts are drawn from webpage interactions. CP results in 61\% fewer decoding errors, 97\% less training for vocabulary changes, near-linear scaling potential with increasing core counts, and up to a potential 90\% reduction in power usage.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2013:DER, author = "Sunwoo Kim and Won Seob Jeong and Won W. Ro and Jean-Luc Gaudiot", title = "Design and evaluation of random linear network coding accelerators on {FPGAs}", journal = j-TECS, volume = "13", number = "1", pages = "13:1--13:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512469", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network coding is a well-known technique used to enhance network throughput and reliability by applying special coding to data packets. One critical problem in practice, when using the random linear network coding technique, is the high computational overhead. More specifically, using this technique in embedded systems with low computational power might cause serious delays due to the complex Galois field operations and matrix handling. To this end, this article proposes a high-performance decoding logic for random linear network coding using field-programmable gate-array (FPGA) technology. We expect that the inherent reconfigurability of FPGAs will provide sufficient performance as well as programmability to cope with changes in the specification of the coding. The main design motivation was to improve the decoding delay by dividing and parallelizing the entire decoding process. Fast arithmetic operations are achieved by the proposed parallelized GF ALUs, which allow calculations with all the elements of a single row of a matrix to be performed concurrently. To improve the flexibility in the utilization of the FPGA components, two different decoding methods have been designed and compared. The performance of the proposed idea is evaluated by comparing with the performance of the decoding process executed by general-purpose processors through an equivalent software algorithm. Overall, a maximum throughput of 65.98 Mbps is achieved with the proposed FPGA design on an XC5VLX110T Virtex 5 device. In addition, the proposed design provides speedups of up to 13.84 compared to an aggressively parallelized software decoding algorithm run on a quad-core AMD processor. Moreover, the design affords 12 times higher power efficiency in terms of throughput per watt than an ARM Coretex-A9 processor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Beg:2013:CPA, author = "Mirza Beg and Peter van Beek", title = "A constraint programming approach for integrated spatial and temporal scheduling for clustered architectures", journal = j-TECS, volume = "13", number = "1", pages = "14:1--14:??", month = aug, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512470", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 5 19:03:11 MDT 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many embedded processors use clustering to scale up instruction-level parallelism in a cost-effective manner. In a clustered architecture, the registers and functional units are partitioned into smaller units and clusters communicate through register-to-register copy operations. Texas Instruments, for example, has a series of architectures for embedded processors which are clustered. Such an architecture places a heavier burden on the compiler, which must now assign instructions to clusters (spatial scheduling), assign instructions to cycles (temporal scheduling), and schedule copy operations to move data between clusters. We consider instruction scheduling of local blocks of code on clustered architectures to improve performance. Scheduling for space and time is known to be a hard problem. Previous work has proposed greedy approaches based on list scheduling to simultaneously perform spatial and temporal scheduling and phased approaches based on first partitioning a block of code to do spatial assignment and then performing temporal scheduling. Greedy approaches risk making mistakes that are then costly to recover from, and partitioning approaches suffer from the well-known phase ordering problem. In this article, we present a constraint programming approach for scheduling instructions on clustered architectures. We employ a problem decomposition technique that solves spatial and temporal scheduling in an integrated manner. We analyze the effect of different hardware parameters-such as the number of clusters, issue-width, and intercluster communication cost-on application performance. We found that our approach was able to achieve an improvement of up to 26\%, on average, over a state-of-the-art technique on superblocks from SPEC 2000 benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brisk:2013:ISI, author = "Philip Brisk and Tulika Mitra", title = "Introduction to the special issue on application-specific processors", journal = j-TECS, volume = "13", number = "2", pages = "15:1--15:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514642", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vyas:2013:HAS, author = "Sudhanshu Vyas and Adwait Gupte and Christopher D. Gill and Ron K. Cytron and Joseph Zambreno and Phillip H. Jones", title = "Hardware architectural support for control systems and sensor processing", journal = j-TECS, volume = "13", number = "2", pages = "16:1--16:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The field of modern control theory and the systems used to implement these controls have shown rapid development over the last 50 years. It was often the case that those developing control algorithms could assume the computing medium was solely dedicated to the task of controlling a plant, for example, the control algorithm being implemented in software on a dedicated Digital Signal Processor (DSP), or implemented in hardware using a simple dedicated Programmable Logic Device (PLD). As time progressed, the drive to place more system functionality in a single component (reducing power, cost, and increasing reliability) has made this assumption less often true. Thus, it has been pointed out by some experts in the field of control theory (e.g., Astrom) that those developing control algorithms must take into account the effects of running their algorithms on systems that will be shared with other tasks. One aspect of the work presented in this article is a hardware architecture that allows control developers to maintain this simplifying assumption. We focus specifically on the Proportional-Integral-Derivative (PID) controller. An on-chip coprocessor has been implemented that can scale to support servicing hundreds of plants, while maintaining microsecond-level response times, tight deterministic control loop timing, and allowing the main processor to service noncontrol tasks. In order to control a plant, the controller needs information about the plant's state. Typically this information is obtained from sensors with which the plant has been instrumented. There are a number of common computations that may be performed on this sensor data before being presented to the controller (e.g., averaging and thresholding). Thus in addition to supporting PID algorithms, we have developed a Sensor Processing Unit (SPU) that off-loads these common sensor processing tasks from the main processor. We have prototyped our ideas using Field Programmable Gate Array (FPGA) technology. Through our experimental results, we show our PID execution unit gives orders of magnitude improvement in response time when servicing many plants, as compared to a standard general software implementation. We also show that the SPU scales much better than a general software implementation. In addition, these execution units allow the simplifying assumption of dedicated computing medium to hold for control algorithm development.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Beldianu:2013:MBV, author = "Spiridon F. Beldianu and Sotirios G. Ziavras", title = "Multicore-based vector coprocessor sharing for performance and energy gains", journal = j-TECS, volume = "13", number = "2", pages = "17:1--17:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For most of the applications that make use of a dedicated vector coprocessor, its resources are not highly utilized due to the lack of sustained data parallelism which often occurs due to vector-length variations in dynamic environments. The motivation of our work stems from: (a) the mandate for multicore designs to make efficient use of on-chip resources for low power and high performance; (b) the omnipresence of vector operations in high-performance scientific and emerging embedded applications; (c) the need to often handle a variety of vector sizes; and (d) vector kernels in application suites may have diverse computation needs. We present a robust design framework for vector coprocessor sharing in multicore environments that maximizes vector unit utilization and performance at substantially reduced energy costs. For our adaptive vector unit, which is attached to multiple cores, we propose three basic shared working policies that enforce coarse-grain, fine-grain, and vector-lane sharing. We benchmark these vector coprocessor sharing policies for a dual-core system and evaluate them using the floating-point performance, resource utilization, and power/energy consumption metrics. Benchmarking for FIR filtering, FFT, matrix multiplication, and LU factorization shows that these coprocessor sharing policies yield high utilization and performance with low energy costs. The proposed policies provide 1.2--2 speedups and reduce the energy needs by about 50\% as compared to a system having a single core with an attached vector coprocessor. With the performance expressed in clock cycles, the sharing policies demonstrate 3.62--7.92 speedups compared to optimized Xeon runs. We also introduce performance and empirical power models that can be used by the runtime system to estimate the effectiveness of each policy in a hybrid system that can simultaneously implement this suite of shared coprocessor policies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jungeblut:2013:SAO, author = "Thorsten Jungeblut and Boris H{\"u}bener and Mario Porrmann and Ulrich R{\"u}ckert", title = "A systematic approach for optimized bypass configurations for application-specific embedded processors", journal = j-TECS, volume = "13", number = "2", pages = "18:1--18:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514645", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The diversity of today's mobile applications requires embedded processor cores with a high resource efficiency, that means, the devices should provide a high performance at low area requirements and power consumption. The fine-grained parallelism supported by multiple functional units of VLIW architectures offers a high throughput at reasonable low clock frequencies compared to single-core RISC processors. To efficiently utilize the processor pipeline, common system architectures have to cope with data hazards due to data dependencies between consecutive operations. On the one hand, such hazards can be resolved by complex forwarding circuits (i.e., a pipeline bypass) which forward intermediate results to a subsequent instruction. On the other hand, the pipeline bypass can strongly affect or even dominate the total resource requirements and degrade the maximum clock frequency. In this work the CoreVA VLIW architecture is used for the development and the analysis of application-specific bypass configurations. It is shown that many paths of a comprehensive bypass system are rarely used and may not be required for certain applications. For this reason, several strategies have been implemented to enhance the efficiency of the total system by introducing application-specific bypass configurations. The configuration can be carried out statically by only implementing required paths or at runtime by dynamically reconfiguring the hardware. An algorithm is proposed which derives an optimized configuration by iteratively disabling single bypass paths. The adaptation of these application-specific bypass configurations allows for a reduction of the critical path by 26\%. As a result, the execution time and energy requirements could be reduced by up to 21.5\%. Using Dynamic Frequency Scaling (DFS) and dynamic deactivation/reactivation of bypass paths allows for a runtime reconfiguration of the bypass system. This ensures the highest efficiency while processing varying applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Theodoropoulos:2013:CAM, author = "Dimitris Theodoropoulos and Georgi Kuzmanov and Georgi Gaydadjiev", title = "Custom architecture for multicore audio beamforming systems", journal = j-TECS, volume = "13", number = "2", pages = "19:1--19:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514646", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The audio Beamforming (BF) technique utilizes microphone arrays to extract acoustic sources recorded in a noisy environment. In this article, we propose a new approach for rapid development of multicore BF systems. Research on literature reveals that the majority of such experimental and commercial audio systems are based on desktop PCs, due to their high-level programming support and potential of rapid system development. However, these approaches introduce performance bottlenecks, excessive power consumption, and increased overall cost. Systems based on DSPs require very low power, but their performance is still limited. Custom hardware solutions alleviate the aforementioned drawbacks, however, designers primarily focus on performance optimization without providing a high-level interface for system control and test. In order to address the aforementioned problems, we propose a custom platform-independent architecture for reconfigurable audio BF systems. To evaluate our proposal, we implement our architecture as a heterogeneous multicore reconfigurable processor and map it onto FPGAs. Our approach combines the software flexibility of General-Purpose Processors (GPPs) with the computational power of multicore platforms. In order to evaluate our system we compare it against a BF software application implemented to a low-power Atom 330, a middle-ranged Core2 Duo, and a high-end Core i3. Experimental results suggest that our proposed solution can extract up to 16 audio sources in real time under a 16-microphone setup. In contrast, under the same setup, the Atom 330 cannot extract any audio sources in real time, while the Core2 Duo and the Core i3 can process in real time only up to 4 and 6 sources respectively. Furthermore, a Virtex4-based BF system consumes more than an order less energy compared to the aforementioned GPP-based approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mariani:2013:DSE, author = "Giovanni Mariani and Gianluca Palermo and Vittorio Zaccaria and Cristina Silvano", title = "Design-space exploration and runtime resource management for multicores", journal = j-TECS, volume = "13", number = "2", pages = "20:1--20:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514647", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Application-specific multicore architectures are usually designed by using a configurable platform in which a set of parameters can be tuned to find the best trade-off in terms of the selected figures of merit (such as energy, delay, and area). This multi-objective optimization phase is called Design-Space Exploration (DSE). Among the design-time (hardware) configurable parameters we can find the memory subsystem configuration (such as cache size and associativity) and other architectural parameters such as the instruction-level parallelism of the system processors. Among the runtime (software) configurable parameters we can find the degree of task-level parallelism associated with each application running on the platform. The contribution of this article is twofold; first, we introduce an evolutionary (NSGA-II-based) methodology for identifying a hardware configuration which is robust with respect to applications and corresponding datasets. Second, we introduce a novel runtime heuristic that exploits design-time identified operating points to provide guaranteed throughput to each application. Experimental results show that the design-time/runtime combined approach improves the runtime performance of the system with respect to existing reference techniques, while meeting the overall power budget.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2013:MPE, author = "Yooseong Kim and Aviral Shrivastava", title = "Memory performance estimation of {CUDA} programs", journal = j-TECS, volume = "13", number = "2", pages = "21:1--21:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514648", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "CUDA has successfully popularized GPU computing, and GPGPU applications are now used in various embedded systems. The CUDA programming model provides a simple interface to program on GPUs, but tuning GPGPU applications for high performance is still quite challenging. Programmers need to consider numerous architectural details, and small changes in source code, especially on the memory access pattern, can affect performance significantly. This makes it very difficult to optimize CUDA programs. This article presents CuMAPz, which is a tool to analyze and compare the memory performance of CUDA programs. CuMAPz can help programmers explore different ways of using shared and global memories, and optimize their program for efficient memory behavior. CuMAPz models several memory-performance-related factors: data reuse, global memory access coalescing, global memory latency hiding, shared memory bank conflict, channel skew, and branch divergence. Experimental results show that CuMAPz can accurately estimate performance with correlation coefficient of 0.96. By using CuMAPz to explore the memory access design space, we could improve the performance of our benchmarks by 30\% more than the previous approach [Hong and Kim 2010].", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stamoulias:2013:PAK, author = "Ioannis Stamoulias and Elias S. Manolakos", title = "Parallel architectures for the {kNN} classifier --- design of soft {IP} cores and {FPGA} implementations", journal = j-TECS, volume = "13", number = "2", pages = "22:1--22:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514649", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We designed a variety of k-nearest-neighbor parallel architectures for FPGAs in the form of parameterizable soft IP cores. We show that they can be used to solve large classification problems with thousands of training vectors, or thousands of vector dimensions using a single FPGA, and achieve very high throughput. They can be used to flexibly synthesize architectures that also cover: 1NN classification (vector quantization), multishot queries (with different $k$), LOOCV cross-validation, and compare favorably to GPU implementations. To the best of our knowledge this is the first attempt to design flexible IP cores for the popular kNN classifier.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2013:ASP, author = "Chen Huang and Frank Vahid and Tony Givargis", title = "Automatic synthesis of physical system differential equation models to a custom network of general processing elements on {FPGAs}", journal = j-TECS, volume = "13", number = "2", pages = "23:1--23:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514650", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Fast execution of physical system models has various uses, such as simulating physical phenomena or real-time testing of medical equipment. Physical system models commonly consist of thousands of differential equations. Solving such equations using software on microprocessor devices may be slow. Several past efforts implement such models as parallel circuits on special computing devices called Field-Programmable Gate Arrays (FPGAs), demonstrating large speedups due to the excellent match between the massive fine-grained local communication parallelism common in physical models and the fine-grained parallel compute elements and local connectivity of FPGAs. However, past implementation efforts were mostly manual or ad hoc. We present the first method for automatically converting a set of ordinary differential equations into circuits on FPGAs. The method uses a general Processing Element (PE) that we developed, designed to quickly solve a set of ordinary differential equations while using few FPGA resources. The method instantiates a network of general PEs, partitions equations among the PEs to minimize communication, generates each PE's custom program, creates custom connections among PEs, and maintains synchronization of all PEs in the network. Our experiments show that the method generates a 400-PE network on a commercial FPGA that executes four different models on average $ 15 \times $ faster than a 3 GHz Intel processor, $ 30 \times $ faster than a commercial 4-core ARM, $ 14 \times $ faster than a commercial 6-core Texas Instruments digital signal processor, and $ 4.4 \times $ faster than an NVIDIA 336-core graphics processing unit. We also show that the FPGA-based approach is reasonably cost effective compared to using the other platforms. The method yields $ 2.1 \times $ faster circuits than a commercial high-level synthesis tool that uses the traditional method for converting behavior to circuits, while using $ 2 \times $ fewer lookup tables, $ 2 \times $ fewer hardcore multiplier (DSP) units, though $ 3.5 \times $ more block RAM due to being programmable. Furthermore, the method does not just generate a single fastest design, but generates a range of designs that trade off size and performance, by using different numbers of PEs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Canis:2013:LOS, author = "Andrew Canis and Jongsok Choi and Mark Aldham and Victor Zhang and Ahmed Kammoona and Tomasz Czajkowski and Stephen D. Brown and Jason H. Anderson", title = "{LegUp}: an open-source high-level synthesis tool for {FPGA}-based processor\slash accelerator systems", journal = j-TECS, volume = "13", number = "2", pages = "24:1--24:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514740", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "It is generally accepted that a custom hardware implementation of a set of computations will provide superior speed and energy efficiency relative to a software implementation. However, the cost and difficulty of hardware design is often prohibitive, and consequently, a software approach is used for most applications. In this article, we introduce a new high-level synthesis tool called LegUp that allows software techniques to be used for hardware design. LegUp accepts a standard C program as input and automatically compiles the program to a hybrid architecture containing an FPGA-based MIPS soft processor and custom hardware accelerators that communicate through a standard bus interface. In the hybrid processor/accelerator architecture, program segments that are unsuitable for hardware implementation can execute in software on the processor. LegUp can synthesize most of the C language to hardware, including fixed-sized multidimensional arrays, structs, global variables, and pointer arithmetic. Results show that the tool produces hardware solutions of comparable quality to a commercial high-level synthesis tool. We also give results demonstrating the ability of the tool to explore the hardware/software codesign space by varying the amount of a program that runs in software versus hardware. LegUp, along with a set of benchmark C programs, is open source and freely downloadable, providing a powerful platform that can be leveraged for new research on a wide range of high-level synthesis topics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Papakonstantinou:2013:ECC, author = "Alexandros Papakonstantinou and Karthik Gururaj and John A. Stratton and Deming Chen and Jason Cong and Wen-Mei W. Hwu", title = "Efficient compilation of {CUDA} kernels for high-performance computing on {FPGAs}", journal = j-TECS, volume = "13", number = "2", pages = "25:1--25:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514652", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rise of multicore architectures across all computing domains has opened the door to heterogeneous multiprocessors, where processors of different compute characteristics can be combined to effectively boost the performance per watt of different application kernels. GPUs, in particular, are becoming very popular for speeding up compute-intensive kernels of scientific, imaging, and simulation applications. New programming models that facilitate parallel processing on heterogeneous systems containing GPUs are spreading rapidly in the computing community. By leveraging these investments, the developers of other accelerators have an opportunity to significantly reduce the programming effort by supporting those accelerator models already gaining popularity. In this work, we adapt one such language, the CUDA programming model, into a new FPGA design flow called FCUDA, which efficiently maps the coarse- and fine-grained parallelism exposed in CUDA onto the reconfigurable fabric. Our CUDA-to-FPGA flow employs AutoPilot, an advanced high-level synthesis tool (available from Xilinx) which enables high-abstraction FPGA programming. FCUDA is based on a source-to-source compilation that transforms the SIMT (Single Instruction, Multiple Thread) CUDA code into task-level parallel C code for AutoPilot. We describe the details of our CUDA-to-FPGA flow and demonstrate the highly competitive performance of the resulting customized FPGA multicore accelerators. To the best of our knowledge, this is the first CUDA-to-FPGA flow to demonstrate the applicability and potential advantage of using the CUDA programming model for high-performance computing in FPGAs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Editors:2013:ISS, author = "{Editors}", title = "Introduction to the special section on {ESTIMedia'10}", journal = j-TECS, volume = "13", number = "1s", pages = "26:1--26:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536748", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jia:2013:SLI, author = "Zai Jian Jia and Tom{\'a}s Bautista and Antonio N{\'u}{\~n}ez and Andy D. Pimentel and Mark Thompson", title = "A system-level infrastructure for multidimensional {MP-SoC} design space co-exploration", journal = j-TECS, volume = "13", number = "1s", pages = "27:1--27:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536749", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a flexible and extensible system-level MP-SoC design space exploration (DSE) infrastructure, called NASA. This highly modular framework uses well-defined interfaces to easily integrate different system-level simulation tools as well as different combinations of search strategies in a simple plug-and-play fashion. Moreover, NASA deploys a so-called dimension-oriented DSE approach, allowing designers to configure the appropriate number of, well-tuned and possibly different, search algorithms to simultaneously co-explore the various design space dimensions. As a result, NASA provides a flexible and re-usable framework for the systematic exploration of the multidimensional MP-SoC design space, starting from a set of relatively simple user specifications. To demonstrate the capabilities of the NASA framework and to illustrate its distinct aspects, we also present several DSE experiments in which, for example, we compare NASA configurations using a single search algorithm for all design space dimensions to configurations using a separate search algorithm per dimension. These proof-of-concept experiments indicate that the latter multidimensional co-exploration can find better design points and evaluates a higher diversity of design alternatives as compared to the more traditional approach of using a single search algorithm for all dimensions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nadezhkin:2013:AGP, author = "Dmitry Nadezhkin and Hristo Nikolov and Todor Stefanov", title = "Automated generation of polyhedral process networks from affine nested-loop programs with dynamic loop bounds", journal = j-TECS, volume = "13", number = "1s", pages = "28:1--28:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536750", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Process Networks (PNs) is a suitable parallel model of computation (MoC) used to specify embedded streaming applications in a parallel form facilitating the efficient mapping onto embedded parallel execution platforms. Unfortunately, specifying an application using a parallel MoC is a very difficult and highly error-prone task. To overcome the associated difficulties, we have developed the pn compiler, which derives specific Polyhedral Process Networks (PPN) parallel specifications from sequential static affine nested loop programs (SANLPs). However, there are many applications, for example, multimedia applications (MPEG coders/decoders, smart cameras, etc.) that have adaptive and dynamic behavior which cannot be expressed as SANLPs. Therefore, in order to handle dynamic multimedia applications, in this article we address the important question whether we can relax some of the restrictions of the SANLPs while keeping the ability to perform compile-time analysis and to derive PPNs. Achieving this would significantly extend the range of applications that can be parallelized in an automated way. The main contribution of this article is a first approach for automated translation of affine nested loop programs with dynamic loop bounds into input-output equivalent Polyhedral Process Networks. In addition, we present a method for analyzing the execution overhead introduced in the PPNs derived from programs with dynamic loop bounds. The presented automated translation approach has been evaluated by deriving a PPN parallel specification from a real-life application called Low Speed Obstacle Detection (LSOD) used in the smart cameras domain. By executing the derived PPN, we have obtained results which indicate that the approach we present in this article facilitates efficient parallel implementations of sequential nested loop programs with dynamic loop bounds. That is, our approach reveals the possible parallelism available in such applications, which allows for the utilization of multiple cores in an efficient way.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2013:AMC, author = "Yulei Wu and Geyong Min and Dakai Zhu and Laurence T. Yang", title = "An analytical model for on-chip interconnects in multimedia embedded systems", journal = j-TECS, volume = "13", number = "1s", pages = "29:1--29:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536751", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The traffic pattern has significant impact on the performance of network-on-chip. Many recent studies have shown that multimedia applications can be supported in on-chip interconnects. Driven by the motivation of evaluating on-chip interconnects in multimedia embedded systems, a new analytical model is proposed to investigate the performance of the fat-tree based on-chip interconnection network under bursty multimedia traffic and nonuniform message destinations. Extensive simulation experiments are conducted to validate the accuracy of the model, which is then adopted as a cost-efficient tool to investigate the effects of bursty multimedia traffic with nonuniform destinations on the network performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Che:2013:SSD, author = "Weijia Che and Karam S. Chatha", title = "Scheduling of synchronous data flow models onto scratchpad memory-based embedded processors", journal = j-TECS, volume = "13", number = "1s", pages = "30:1--30:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536752", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose a heuristic algorithm for scheduling synchronous data flow (SDF) models on scratch pad memory (SPM) enhanced processors with the objective of minimizing its steady-state execution time. The task involves partitioning the limited on-chip SPM for actor code and data buffer, and executing actors in such a manner that the physical SPM is time shared with different actors and buffers (formally defined as code overlay and data overlay, respectively). In our setup, a traditional minimum buffer schedule could result in very high code overlay overhead and therefore may not be optimal. To reduce the number of direct memory access (DMA) transfers, actors need to be grouped into segments. Prefetching of code and data overlay that overlaps DMA transfers with actor executions also need to be exploited. The efficiency of the our heuristic was evaluated by compiling ten stream applications onto one synergistic processing engine (SPE) of an IBM Cell Broadband Engine. We compare the performance results of our heuristic approach with a minimum buffer scheduling approach and a 3-stage ILP approach, and show that our heuristic is able to generate high quality solutions with fast algorithm run time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schmoll:2013:IFR, author = "Florian Schmoll and Andreas Heinig and Peter Marwedel and Michael Engel", title = "Improving the fault resilience of an {H.264} decoder using static analysis methods", journal = j-TECS, volume = "13", number = "1s", pages = "31:1--31:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536753", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Fault tolerance rapidly evolves into one of the most significant design objectives for embedded systems due to reduced semiconductor structures and supply voltages. However, resource-constrained systems cannot afford traditional error correction for overhead and cost reasons. New methods are required to sustain acceptable service quality in case of errors while avoiding crashes. We present a flexible fault-tolerance approach that is able to select correction actions depending on error semantics using application annotations and static analysis approaches. We verify the validity of our approach by analyzing the vulnerability and improving the reliability of an H.264 decoder using flexible error handling.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Parmer:2013:PCC, author = "Gabriel Parmer and Richard West", title = "Predictable and configurable component-based scheduling in the {Composite OS}", journal = j-TECS, volume = "13", number = "1s", pages = "32:1--32:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536754", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents the design of user-level scheduling hierarchies in the Composite component-based system. The motivation for this is centered around the design of a system that is both dependable and predictable, and which is configurable to the needs of specific applications. Untrusted application developers can safely develop services and policies, that are isolated in protection domains outside the kernel. To ensure predictability, Composite enforces timing control over user-space services. Moreover, it must provide a means by which asynchronous events, such as interrupts, are handled in a timely manner without jeopardizing the system. Towards this end, we describe the features of Composite that allow user-defined scheduling policies to be composed for the purposes of combined interrupt and task management. A significant challenge arises from the need to synchronize access to shared data structures (e.g., scheduling queues), without allowing untrusted code to disable interrupts. Additionally, efficient upcall mechanisms are needed to deliver asynchronous event notifications in accordance with policy-specific priorities, without undue recourse to schedulers. We show how these issues are addressed in Composite, by comparing several hierarchies of scheduling polices, to manage both tasks and the interrupts on which they depend. Studies show how it is possible to implement guaranteed differentiated services as part of the handling of I/O requests from a network device while diminishing livelock. Microbenchmarks indicate that the costs of implementing and invoking user-level schedulers in Composite are on par with, or less than, those in other systems, with thread switches more than twice as fast as in Linux.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2013:ARD, author = "Bo Zhou and Xiaobo Sharon Hu and Danny Z. Chen and Cedric X. Yu", title = "Accelerating radiation dose calculation: a multi-{FPGA} solution", journal = j-TECS, volume = "13", number = "1s", pages = "33:1--33:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536755", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Remarkable progress has been made in the past few decades in various aspects of radiation therapy (RT). However, some of these promising technologies, such as image-guided online replanning and arc therapy, rely heavily on the availability of fast dose calculation. In this article, based on a popular dose calculation algorithm, the Collapsed-Cone Convolution/Superposition (CCCS) algorithm, we present a multi-FPGA accelerator to speed up radiation dose calculation. Our performance-driven design strategy yields a fully pipelined architecture, which includes a resource-economic raytracing engine and high-performance energy deposition pipeline. An evaluation based on a set of clinical treatment planning cases confirms that our FPGA design almost fully utilizes the available external memory bandwidth and achieves close to the best possible performance for the CCCS algorithm while using less resource. Compared with an existing FPGA design which aimed to accelerate the identical algorithm, the proposed design achieved 1.9X speedup by providing better memory bandwidth utilization (81.7\% v.s. 43\% of the available external memory bandwidth), higher working frequency (90MHz v.s. 70MHz) and less logic resource usage (25K v.s. 55K logic cells). Furthermore, it obtains a speedup of 20X over a commercial multithreaded software on a quad-core system and 15X performance improvement over closely related results. In terms of accuracy, the measured less than 1\% statistical fluctuation indicates that our solution is practical in real medical scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Furtado:2013:CON, author = "Pedro Furtado and Jos{\'e} Cec{\'\i}lio", title = "Configuration and operation of networked control systems over heterogeneous {WSANs}", journal = j-TECS, volume = "13", number = "1s", pages = "34:1--34:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536756", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "There have been both research and commercial advances on applying Wireless Sensor and Actuator Networks (WSN) in industrial premises. These have cost advantages related to avoiding some cabled deployments. A possible architecture involves a Networked Control System (NCS) with many small WSN subnetworks, cabled nodes and computer servers (e.g., servers, control stations). In those systems individual sensor nodes can be programmed, as opposed to cabled analog systems. We investigate approaches for networked-wide configuration, where all nodes-cabled or WSN sensors-can be configured with simplicity from a single interface, instead of hand-coding or complex configurations of individual nodes. We propose an architecture and approach for configuration and operation. Previous related proposals on middleware involving WSNs suffer from two major limitations: they either program within an individual WSN or configure operation outside WSNs, wrapping data coming from WSN. They do not allow configuring WSN and non-WSN nodes for operation from a single interface. We discuss the architecture and propose the NCSWSN configuration and operation approach. We are applying this system in an industrial testbed, therefore we test the approach and also show user interfaces and results from the deployment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sanz:2013:SLM, author = "Concepci{\'o}n Sanz and Jos{\'e} Ignacio G{\'o}mez and Christian Tenllado and Manuel Prieto and Francky Catthoor", title = "System-level memory management based on statistical variability compensation for frame-based applications", journal = j-TECS, volume = "13", number = "1s", pages = "35:1--35:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536757", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Process variability and dynamic domains increase the uncertainty of embedded systems and force designers to apply pessimistic designs, which become unnecessarily conservative and have a tremendous impact on both performance and energy consumption. In this context, developing uncertainty-aware design methodologies that take both variation at platform and at application level into account becomes a must. These methodologies should mitigate the effects derived from uncertainty, avoiding worst-case assumptions. In this article we propose a comprehensive methodology to tackle two forms of uncertainty: (1) process variation on the memory system, (2) application dynamism. A statistical model has been developed to deal with variability derived from fabrication process, whereas system scenarios are selected to cope with dynamic domains. Both sources of uncertainty are firstly tackled in combination at design time, to be refined later, at setup. As a result, at run time the platform can be successfully adapted to the current application behaviour as well as the current variations. Our simulations show that this methodology provides significant energy savings while still meeting strict timing constraints.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mohaqeqi:2013:ASR, author = "Morteza Mohaqeqi and Mehdi Kargahi and Maryam Dehghan", title = "Adaptive scheduling of real-time systems cosupplied by renewable and nonrenewable energy sources", journal = j-TECS, volume = "13", number = "1s", pages = "36:1--36:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536758", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy management is an important issue in today's real-time systems due to the high costs of energy supplying. Using renewable, like wave, wind, and solar energy sources seem promising methods to address this issue. However, because of the existing contrast between the critical nature of hard real-time systems and the unpredictable nature of renewable energies, some supplementary energy source like electricity grid or battery is needed. In this paper, we consider hard real-time systems with two renewable and nonrenewable energy sources. In order to reduce the costs, we present two dynamic voltage scaling controllers to minimize the energy attained from the latter source. In order to handle variations of the environmental energy and workload, the model predictive control approach is employed. One nonlinear approach beside one fast linear piecewise affine explicit controller are proposed. The efficacies of the proposed approaches have been investigated through extensive simulations. Comparisons to an ideal clairvoyant controller as a baseline show that, in the studied scenarios, the proposed controllers guarantee at least 78\% of the baseline performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lo:2013:AGH, author = "Chen-Kang Lo and Mao-Lin Li and Li-Chun Chen and Yi-Shan Lu and Ren-Song Tsay and Hsu-Yao Huang and Jen-Chieh Yeh", title = "Automatic generation of high-speed accurate {TLM} models for out-of-order pipelined bus", journal = j-TECS, volume = "13", number = "1s", pages = "37:1--37:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536759", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Although pipelined/out-of-order (PL/OO) execution features are commonly supported by the state-of-the-art bus designs, no existing manual Transaction-Level-Modeling (TLM) approaches can effectively construct fast and accurate simulation models for PL/OO buses. Mainly, the inherent high design complexity of concurrent PL/OO behaviors makes the manual approaches tedious and error-prone. To tackle the complicated modeling task, this article presents an automatic approach that performs systematic abstraction and generation of fast-and-accurate simulation models. The experimental results show that our approach reduces 21 times modeling efforts, while our generated models perform simulation an order of magnitude faster than Cycle-Accurate models with the same PL/OO transaction execution cycle counts preserved.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2013:SBR, author = "Jongeun Lee and Aviral Shrivastava", title = "Software-based register file vulnerability reduction for embedded processors", journal = j-TECS, volume = "13", number = "1s", pages = "38:1--38:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536760", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Register File (RF) is extremely vulnerable to soft errors, and traditional redundancy based schemes to protect the RF are prohibitive not only because RF is often in the timing critical path of the processor, but also since it is one of the hottest blocks on the chip. Software approaches would be ideal in this case, but previous approaches based on instruction scheduling are only moderately effective due to local scope. In this article we present a compiler approach, based on interprocedural program analysis, to reduce the vulnerability of registers by temporarily writing live variables to protected memory. We formulate the problem as an integer linear programming problem and also present a very efficient heuristic algorithm. Further we present an iterative optimization method based on Kernighan--Lin's graph partitioning algorithm. Our experiments demonstrate that our proposed techniques can reduce the vulnerability of a RF by 33 to 37\% on average and up to 66\%, with a small 2\% increase in runtime. In addition, our overhead reduction optimization can effectively reduce the code size overhead, by more than 40\% on average, to a mere 5 to 6\%, compared to highly optimized binaries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Singh:2013:MCN, author = "Anshul Singh and Arindam Basu and Keck-Voon Ling and Vincent J. {Mooney III}", title = "Models for characterizing noise based {PCMOS} circuits", journal = j-TECS, volume = "13", number = "1s", pages = "39:1--39:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536761", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Quick and accurate error-rate prediction of Probabilistic CMOS (PCMOS) circuits is crucial for their systematic design and performance evaluation. While still in the early stage of research, PCMOS has shown potential to drastically reduce energy consumption at a cost of increased errors. Recently, a methodology has been proposed which could predict the error rates of cascade structures of blocks in PCMOS. This methodology requires error rates of unique blocks to predict the error rates of multiblock cascade structures composed of these unique blocks. In this article we present a new model for characterization of probabilistic circuits/blocks and present a procedure to find and characterize unique circuits/blocks. Unlike prior approaches, our new model distinguishes distinct filtering effects per output, thereby improving prediction accuracy by an average of 95\% over the prior art by Palem and coauthors. Furthermore, we show two models where our new model with three stages is 18\% more accurate, on average, than our simpler two-stage model. We apply our proposed models to Ripple Carry Adders and Wallace Tree Multipliers and show that using our models, the methodology of cascade structures can predict error rates of PCMOS circuits with reasonable accuracy (within 9\%) in PCMOS for uniform voltages as well as multiple voltages. Finally, our approach takes seconds of simulation time whereas using HSPICE would take days of simulation time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anagnostopoulos:2013:PAD, author = "Iraklis Anagnostopoulos and Jean-Michel Chabloz and Ioannis Koutras and Alexandros Bartzas and Ahmed Hemani and Dimitrios Soudris", title = "Power-aware dynamic memory management on many-core platforms utilizing {DVFS}", journal = j-TECS, volume = "13", number = "1s", pages = "40:1--40:??", month = nov, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2536747.2536762", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Dec 9 11:30:05 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today multicore platforms are already prevalent solutions for modern embedded systems. In the future, embedded platforms will have an even more increased processor core count, composing many-core platforms. In addition, applications are becoming more complex and dynamic and try to efficiently utilize the amount of available resources on the embedded platforms. Efficient memory utilization is a key challenge for application developers, especially since memory is a scarce resource and often becomes the system's bottleneck. To cope with this dynamism and achieve better memory footprint utilization (low memory fragmentation) application developers resort to the usage of dynamic memory (heap) management techniques, by allocating and deallocating data at runtime. Moreover, overall power consumption is another key challenge that needs to be taken into consideration. Towards this, designers employ the usage of Dynamic Voltage and Frequency Scaling (DVFS) mechanisms, adapting to the application's computational demands at runtime. In this article, we propose the combination of dynamic memory management techniques with DVFS ones. This is performed by integrating, within the memory manager, runtime monitoring mechanisms that steer the DVFS mechanisms to adjust clock frequency and voltage supply based on heap performance. The proposed approach has been evaluated on a distributed shared-memory many-core platform composed of multiple LEON3 processors interconnected by a Network-on-Chip infrastructure, supporting DVFS. Experimental results show that by using the proposed method for monitoring and applying DVFS mechanisms the power consumption concerning dynamic memory management was reduced by approximately 37\%. In addition we present the trade-offs the proposed approach. Last, by combining the developed method with heap fragmentation-aware dynamic memory managers, we achieve low heap fragmentation values combined with low power consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Editors:2014:MMA, author = "{Editors}", title = "Monitoring massive appliances by a minimal number of smart meters", journal = j-TECS, volume = "13", number = "2s", pages = "56:1--56:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544376", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a framework for deploying a minimal number of smart meters to accurately track the ON/OFF states of a massive number of electrical appliances which exploits the sparseness feature of simultaneous ON/OFF switching events of the massive appliances. A theoretical bound on the least number of required smart meters is studied by an entropy-based approach, which qualifies the impact of meter deployment strategies to the state tracking accuracy. It motivates a meter deployment optimization algorithm (MDOP) to minimize the number of meters while satisfying given requirements to state tracking accuracy. To accurately decode the real-time ON/OFF states of appliances by the readings of meters, a fast state decoding (FSD) algorithm based on the hidden Markov model (HMM) is presented to track the state sequence of each appliance for better accuracy. Although traditional HMM needs $ O(t 2^{2 N}) $ time complexity to conduct online sequence decoding, FSD improves the complexity to $ O (t n^{U + 1}) $, where n {$<$} N and U is an upper bound of the simultaneous switching events. Both MDOP and FSD are verified extensively using simulations and real PowerNet data. The results show that the meter deployment cost can be saved by more than 80\% while still getting over 90\% state tracking accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2014:EDF, author = "Chenye Wu and Yiyu Shi and Soummya Kar", title = "Exploring demand flexibility in heterogeneous aggregators: an {LMP}-based pricing scheme", journal = j-TECS, volume = "13", number = "2s", pages = "57:1--57:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544377", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the proposed penetration of electric vehicles and advanced metering technology, the demand side is foreseen to play a major role in flexible energy consumption scheduling. On the other hand, the past several years have witnessed utility companies' growing interests to integrate more renewable energy resources. These renewable resources, for example, wind or solar, due to their intermittent nature, brought great uncertainty to the power grid system. In this article, we propose a mechanism that attempts to mitigate the grid operational uncertainty induced by renewable energies by properly exploiting demand flexibility with the help of advanced smart-metering technology. To address the challenge, we develop a novel locational marginal price (LMP)-based pricing scheme that involves active demand-side participation by casting the network objective as a two-stage Stackelberg game between the local grid operator and several aggregators. In contrast to the conventional notion that generation follows load, our game formulation provides more flexibility for the operators and tries to provide adequate incentives for the loads to follow the (stochastic renewable) generation. We use the solution concept of subgame perfect equilibrium to analyze the resulting game. Subsequently, we discuss the optimal real-time conventional capacity planning for the local grid operator to achieve the minimal mismatch between supply and demand with the wind power integration. Finally, we assess our proposed scheme with field data. The simulation results show that our proposed scheme works reasonably well in the long term, even with simple heuristics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2014:ISS, author = "Naehyuck Chang and Jian-Jia Chen", title = "Introduction to the special section on {ESTIMedia'11}", journal = j-TECS, volume = "13", number = "2s", pages = "58:1--58:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544378", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Su:2014:RVP, author = "Tzu-Hsiang Su and Hsiang-Jen Tsai and Keng-Hao Yang and Po-Chun Chang and Tien-Fu Chen and Yi-Ting Zhao", title = "Reconfigurable vertical profiling framework for the {Android} runtime system", journal = j-TECS, volume = "13", number = "2s", pages = "59:1--59:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544379", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Dalvik virtual machine in the Android system creates a profiling barrier between VM-space applications and Linux user-space libraries. It is difficult for existing profiling tools on the Android system to definitively identify whether a bottleneck occurred in the application level, the Linux user-space level, or the Linux kernel level. Information barriers exist between VM-space applications and Linux native analysis tools due to runtime virtual machines' dynamic memory allocation mechanism. Furthermore, traditional vertical profiling tools targeted for Java virtual machines cannot be simply applied on the Dalvik virtual machine due to its unique design. The proposed the Reconfigurable Vertical Profiling Framework bridges the information gap and streamlines the hardware-software co-design process for the Android runtime system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Song:2014:POA, author = "Wook Song and Yeseong Kim and Hakbong Kim and Jehun Lim and Jihong Kim", title = "Personalized optimization for {Android} smartphones", journal = j-TECS, volume = "13", number = "2s", pages = "60:1--60:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544380", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As a highly personalized computing device, smartphones present a unique new opportunity for system optimization. For example, it is widely observed that a smartphone user exhibits very regular application usage patterns (although different users are quite different in their usage patterns). User-specific high-level app usage information, when properly managed, can provide valuable hints for optimizing various system design requirements. In this article, we describe the design and implementation of a personalized optimization framework for the Android platform that takes advantage of user's application usage patterns in optimizing the performance of the Android platform. Our optimization framework consists of two main components, the application usage modeling module and the usage model-based optimization module. We have developed two novel application usage models that correctly capture typical smartphone user's application usage patterns. Based on the application usage models, we have implemented an app-launching experience optimization technique which tries to minimize user-perceived delays, extra energy consumption, and state loss when a user launches apps. Our experimental results on the Nexus S Android reference phones show that our proposed optimization technique can avoid unnecessary application restarts by up to 78.4\% over the default LRU-based policy of the Android platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mirzoyan:2014:PVA, author = "Davit Mirzoyan and Benny Akesson and Kees Goossens", title = "Process-variation-aware mapping of best-effort and real-time streaming applications to {MPSoCs}", journal = j-TECS, volume = "13", number = "2s", pages = "61:1--61:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2490819", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As technology scales, the impact of process variation on the maximum supported frequency (FMAX) of individual cores in a multiprocessor system-on-chip (MPSoC) becomes more pronounced. Task allocation without variation-aware performance analysis can greatly compromise performance and lead to a significant loss in yield, defined as the percentage of manufactured chips satisfying the application timing requirement. We propose variation-aware task allocation for best-effort and real-time streaming applications modeled as task graphs. Our solutions are primarily based on the throughput requirement, which is the most important timing requirement in many real-time streaming applications. The four main contributions of this work are (1) distinguishing best-effort firm real-time and soft real-time application classes, which require different optimization criteria, (2) using dataflow graphs, which are well suited for modeling and analysis of streaming applications, we explicitly model task execution both in terms of clock cycles (which is independent of variation) and seconds (which does depend on the variation of the resource), which we connect by an explicit binding, (3) we present two optimization approaches, which give different improvement results at different costs, (4) we present both exhaustive and heuristic algorithms that implement the optimization approaches. Our variation-aware mapping algorithms are tested on models of seven real applications and are compared to mapping methods that are unaware of hardware variation. Our results demonstrate (1) improvements in the average performance (3\% on average) for best-effort applications, and (2) for firm real-time and soft real-time applications, yield improvements of up to 27\% with an average of 15\%, showing the effectiveness of our approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jung:2014:HCO, author = "Dong-Heon Jung and Soo-Mook Moon and Hyeong-Seok Oh", title = "Hybrid compilation and optimization for {Java}-based digital {TV} platforms", journal = j-TECS, volume = "13", number = "2s", pages = "62:1--62:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2506257", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Java-based software platform for interactive digital TV (DTV) is composed of the system/middleware class statically installed on the DTV set-top box and the xlet applications dynamically downloaded from the TV stations. The xlet application includes Java classes and image/text files. The xlets are executed only when the TV viewer initiates an interaction, even if the xlets have been completely downloaded. To achieve high performance on this dual-component, user-initiated system, existing just-in-time (JIT) compilation and optimization is not enough; instead, ahead-of-time and idle-time compilation and optimization are also needed, requiring a hybrid compilation and optimization environment. We constructed such a hybrid environment for a commercial DTV software platform and evaluated it using real, on-air xlet applications. Our experimental results show that the proposed hybrid environment can improve the DTV Java performance by more than three times, compared to the JIT-only environment, with little change to other DTV behavior.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2014:RAC, author = "Li-Pin Chang and Chen-Yi Wen", title = "Reducing asynchrony in channel garbage-collection for improving internal parallelism of multichannel solid-state disks", journal = j-TECS, volume = "13", number = "2s", pages = "63:1--63:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544383", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Solid-state disks use multichannel architectures to boost their data transfer rates. Because realistic disk workloads have numerous small write requests, modern flash-storage devices adopt a write buffer and a set of independent channels for better parallelism in serving small write requests. When a channel is undergoing garbage collection, it stops responding to inbound write traffic and accumulates page data in the write buffer. This results in contention for buffer space and creates idle periods in channels. This study presents a channel-management strategy, called garbage-collection advancing, which allows early start of garbage collection in channels for increasing the overlap among channel activities of garbage collection and restoring the balance of buffer-space usage among channels. This study further introduces cycle filling, which is a version of garbage-collection advancing tailored for the operation model of flash planes. Experimental results show that the proposed methods greatly outperformed existing designs of multichannel systems in terms of response and throughput. We also successfully implemented the proposed methods in a real solid-state disk and proved their feasibility in real hardware.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2014:MRT, author = "Zheng Li and Frank Lockom and Shangping Ren", title = "Maintaining real-time application timing similarity for defect-tolerant {NoC}-based many-core systems", journal = j-TECS, volume = "13", number = "2s", pages = "64:1--64:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544384", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many-core Network-on-Chip (NoC) processors are emerging in broad application areas, including those with timing requirements, such as real-time and multimedia applications. Typically, these processors employ core-level backup to improve yield. However, when defective cores are replaced by backup ones, the NoC topology changes. Consequently, a fine-tuned application based on timing parameters given by one topology may not meet the expected timing behavior under the new one. We first develop a metric to measure timing similarity of an application on different NoC topologies and then propose mixed binary quadratic programming and greedy algorithms to reconfigure a defect-tolerant many-core NoC.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2014:TSA, author = "Masud Ahmed and Nathan Fisher", title = "Tractable schedulability analysis and resource allocation for real-time multimodal systems", journal = j-TECS, volume = "13", number = "2s", pages = "65:1--65:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time multimedia subsystems often require support for switching between different resource and application execution modes. To ensure that timing constraints are not violated during or after a subsystem mode change, real-time schedulability analysis is required. However, existing time-efficient multimode schedulability analysis techniques for application-only mode changes are not appropriate for subsystems that require changes in the resource execution behavior (e.g., processors with dynamic power modes). Furthermore, all existing multimode schedulability analysis that handles both resource and application mode changes is highly exponential and not scalable for subsystems with a moderate or large number of modes. As a result, the notion of resource optimality is still unaddressed for real-time multimodal systems. In this report, we first address the lack of tractable schedulability analysis for such subsystems by proposing a model for characterizing multiple resource and application modes and by deriving a sufficient schedulability test that has pseudo-polynomial time complexity. Finally, we propose an algorithm which leverages this pseudo-polynomial schedulability analysis to optimize the resource usages (e.g., to minimize peak-power load) of a multimodal real-time system. Simulation results show that our proposed algorithms for schedulability analysis and resource allocation, when compared with previously-proposed approaches, require significantly less time and are just as precise.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Balani:2014:DPF, author = "Rahul Balani and Lucas F. Wanner and Mani B. Srivastava", title = "Distributed programming framework for fast iterative optimization in networked cyber-physical systems", journal = j-TECS, volume = "13", number = "2s", pages = "66:1--66:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544386", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Large-scale coordination and control problems in cyber-physical systems are often expressed within the networked optimization model. While significant advances have taken place in optimization techniques, their widespread adoption in practical implementations has been impeded by the complexity of internode coordination and lack of programming support for the same. Currently, application developers build their own elaborate coordination mechanisms for synchronized execution and coherent access to shared resources via distributed and concurrent controller processes. However, they typically tend to be error prone and inefficient due to tight constraints on application development time and cost. This is unacceptable in many CPS applications, as it can result in expensive and often irreversible side-effects in the environment due to inaccurate or delayed reaction of the control system. This article explores the design of a distributed shared memory (DSM) architecture that abstracts the details of internode coordination. It simplifies application design by transparently managing routing, messaging, and discovery of nodes for coherent access to shared resources. Our key contribution is the design of provably correct locality-sensitive synchronization mechanisms that exploit the spatial locality inherent in actuation to drive faster and scalable application execution through opportunistic data parallel operation. As a result, applications encoded in the proposed Hotline Application Programming Framework are error free, and in many scenarios, exhibit faster reactions to environmental events over conventional implementations. Relative to our prior work, this article extends Hotline with a new locality-sensitive coordination mechanism for improved reaction times and two tunable iteration control schemes for lower message costs. Our extensive evaluation demonstrates that realistic performance and cost of applications are highly sensitive to the prevalent deployment, network, and environmental characteristics. This highlights the importance of Hotline, which provides user-configurable options to trivially tune these metrics and thus affords time to the developers for implementing, evaluating, and comparing multiple algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brandt:2014:PCS, author = "Jens Brandt and Klaus Schneider and Yu Bai", title = "Passive code in synchronous programs", journal = j-TECS, volume = "13", number = "2s", pages = "67:1--67:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The synchronous model of computation requires that in every step, inputs are read and outputs are synchronously computed as the reaction of the program. In addition, all internal variables are updated in parallel even though not all of these values might be required for the current and the future reaction steps. To avoid unnecessary computations, we present a compile-time optimization procedure that computes for every variable a condition that determines whether its value is required for current or future computations. In this sense, our optimizations allow us to identify passive code that can be disabled to avoid unnecessary computations and therefore to reduce the reaction time of programs or their energy consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gu:2014:AES, author = "Yu Gu and Liang He and Ting Zhu and Tian He", title = "Achieving energy-synchronized communication in energy-harvesting wireless sensor networks", journal = j-TECS, volume = "13", number = "2s", pages = "68:1--68:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544388", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With advances in energy-harvesting techniques, it is now feasible to build sustainable sensor networks to support long-term applications. Unlike battery-powered sensor networks, the objective of sustainable sensor networks is to effectively utilize a continuous stream of ambient energy. Instead of pushing the limits of energy conservation, we aim to design energy-synchronized schemes that keep energy supplies and demands in balance. Specifically, this work presents Energy-Synchronized Communication (ESC) as a transparent middleware between the network layer and MAC layer that controls the amount and timing of RF activity at receiving nodes. In this work, we first derive a delay model for cross-traffic at individual nodes, which reveals an interesting stair effect. This effect allows us to design a localized energy synchronization control with $ o(d^3) $ time complexity that shuffles or adjusts the working schedule of a node to optimize cross-traffic delays in the presence of changing duty cycle budgets, where d is the node degree in the network. Under different rates of energy fluctuations, shuffle-based and adjustment-based methods have different influences on logical connectivity and cross-traffic delay, due to the inconsistent views of working schedules among neighboring nodes before schedule updates. We study the trade-off between them and propose methods for updating working schedules efficiently. To evaluate our work, ESC is implemented on MicaZ nodes with two state-of-the-art routing protocols. Both testbed experiment and large-scale simulation results show significant performance improvements over randomized synchronization controls.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2014:CFE, author = "Jinkyu Lee and Arvind Easwaran and Insik Shin", title = "Contention-free executions for real-time multiprocessor scheduling", journal = j-TECS, volume = "13", number = "2s", pages = "69:1--69:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2494530", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A time slot is defined as contention-free if the number of jobs with remaining executions in the slot is no larger than the number of processors, or contending, otherwise. Then an important property holds that in any contention-free slot, all jobs with remaining executions are guaranteed to be scheduled as long as the scheduler is work-conserving. This article aims at improving schedulability by utilizing the contention-free slots. To achieve this, this article presents a policy (called CF policy) that moves some job executions from contending slots to contention-free ones. This policy can be employed by any work-conserving, preemptive scheduling algorithm, and we show that any algorithm extended with this policy dominates the original algorithm in terms of schedulability. We also present improved schedulability tests for algorithms that employ this policy, based on the observation that interference from jobs is reduced when their executions are postponed to contention-free slots. Simulation results demonstrate that the CF policy, incorporated into existing algorithms, significantly improves schedulability of those existing algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2014:TMP, author = "Huang Huang and Vivek Chaturvedi and Gang Quan and Jeffrey Fan and Meikang Qiu", title = "Throughput maximization for periodic real-time systems under the maximal temperature constraint", journal = j-TECS, volume = "13", number = "2s", pages = "70:1--70:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544390", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we study the problem of how to maximize the throughput of a periodic real-time system under a given peak temperature constraint. We assume that different tasks in our system may have different power and thermal characteristics. Two scheduling approaches are presented. The first is built upon processors that can be in either active or sleep mode. By judiciously selecting tasks with different thermal characteristics as well as alternating the processor's active / sleep mode, the sleep period required to cool down the processor is kept at a minimum level, and, as the result, the throughput is maximized. We further extend this approach for processors with dynamic voltage/frequency scaling (DVFS) capability. Our experiments on a large number of synthetic test cases as well as real benchmark programs show that the proposed methods not only consistently outperform the existing approaches in terms of throughput maximization, but also significantly improve the feasibility of tasks when a more stringent temperature constraint is imposed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Elewi:2014:EET, author = "Abdullah Elewi and Mohamed Shalan and Medhat Awadalla and Elsayed M. Saad", title = "Energy-efficient task allocation techniques for asymmetric multiprocessor embedded systems", journal = j-TECS, volume = "13", number = "2s", pages = "71:1--71:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544391", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Asymmetric multiprocessor systems are considered power-efficient multiprocessor architectures. Furthermore, efficient task allocation (partitioning) can achieve more energy efficiency at these asymmetric multiprocessor platforms. This article addresses the problem of energy-aware static partitioning of periodic real-time tasks on asymmetric multiprocessor (multicore) embedded systems. The article formulates the problem according to the Dynamic Voltage and Frequency Scaling (DVFS) model supported by the platform and shows that it is an NP-hard problem. Then, the article outlines optimal reference partitioning techniques for each case of DVFS model with suitable assumptions. Finally, the article proposes modifications to the traditional bin-packing techniques and designs novel techniques taking into account the DVFS model supported by the platform. All algorithms and techniques are simulated and compared. The simulation shows promising results, where the proposed techniques reduced the energy consumption by 75\% compared to traditional methods when DVFS is not supported and by 50\% when per-core DVFS is supported by the platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2014:EAT, author = "Anup Das and Akash Kumar and Bharadwaj Veeravalli", title = "Energy-aware task mapping and scheduling for reliable embedded computing systems", journal = j-TECS, volume = "13", number = "2s", pages = "72:1--72:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544392", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Task mapping and scheduling are critical in minimizing energy consumption while satisfying the performance requirement of applications enabled on heterogeneous multiprocessor systems. An area of growing concern for modern multiprocessor systems is the increase in the failure probability of one or more component processors. This is especially critical for applications where performance degradation (e.g., throughput) directly impacts the quality of service requirement. This article proposes a design-time (offline) multi-criterion optimization technique for application mapping on embedded multiprocessor systems to minimize energy consumption for all processor fault-scenarios. A scheduling technique is then proposed based on self-timed execution to minimize the schedule storage and construction overhead at runtime. Experiments conducted with synthetic and real applications from streaming and nonstreaming domains on heterogeneous MPSoCs demonstrate that the proposed technique minimizes energy consumption by 22\% and design space exploration time by $ 100 \times $, while satisfying the throughput requirement for all processor fault-scenarios. For scalable throughput applications, the proposed technique achieves 30\% better throughput per unit energy, compared to the existing techniques. Additionally, the self-timed execution-based scheduling technique minimizes schedule construction time by 95\% and storage overhead by 92\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2014:STN, author = "Xiaohang Wang and Mei Yang and Yingtao Jiang and Peng Liu and Masoud Daneshtalab and Maurizio Palesi and Terrence Mak", title = "On self-tuning networks-on-chip for dynamic network-flow dominance adaptation", journal = j-TECS, volume = "13", number = "2s", pages = "73:1--73:??", month = jan, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2544375.2544393", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 28 17:34:43 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern network-on-chip (NoC) systems are required to handle complex runtime traffic patterns and unprecedented applications. Data traffics of these applications are difficult to fully comprehend at design time so as to optimize the network design. However, it has been discovered that the majority of dataflows in a network are dominated by less than 10\% of the specific pathways. In this article, we introduce a method that is capable of identifying critical pathways in a network at runtime and can then dynamically reconfigure the network to optimize for network performance subject to the identified dominated flows. An online learning and analysis scheme is employed to quickly discover the emerging dominated traffic flows and provides a statistical traffic prediction using regression analysis. The architecture of a self-tuning network is also discussed which can be reconfigured by setting up the identified point-to-point paths for the dominance dataflows in large traffic volumes. The merits of this new approach are experimentally demonstrated using comprehensive NoC simulations. Compared to the conventional network architectures over a range of realistic applications, the proposed self-tuning network approach can effectively reduce the latency and power consumption by as much as 25\% and 24\%, respectively. We also evaluated the configuration time and additional hardware cost. This new approach demonstrates the capability of an adaptive NoC to handle more complex and dynamic applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bournoutian:2013:AAA, author = "Garo Bournoutian and Alex Orailoglu", title = "Application-aware adaptive cache architecture for power-sensitive mobile processors", journal = j-TECS, volume = "13", number = "3", pages = "41:1--41:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539037", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today, mobile smartphones are expected to be able to run the same complex, algorithm-heavy, memory-intensive applications that were originally designed and coded for general-purpose processors. All the while, it is also expected that these mobile processors be power-conscientious as well as of minimal area impact. These devices pose unique usage demands of ultra-portability but also demand an always-on, continuous data access paradigm. As a result, this dichotomy of continuous execution versus long battery life poses a difficult challenge. This article explores a novel approach to mitigating mobile processor power consumption while abating any significant degradation in execution speed. The concept relies on efficiently leveraging both compile-time and runtime application memory behavior to intelligently target adjustments in the cache to significantly reduce overall processor power, taking into account both the dynamic and leakage power footprint of the cache subsystem. The simulation results show a significant reduction in power consumption of approximately 13\% to 29\%, while only incurring a nominal increase in execution time and area.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2013:GOV, author = "Bo Zhou and Kai Xiao and Danny Z. Chen and X. Sharon Hu", title = "{GPU}-optimized volume ray tracing for massive numbers of rays in radiotherapy", journal = j-TECS, volume = "13", number = "3", pages = "42:1--42:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Ray tracing within a uniform grid volume is a fundamental process invoked frequently by many applications, especially radiation-dose calculation methods in radiotherapy. However, the conflicting features between the GPU memory architecture and the memory-accessing patterns of volume ray tracing lead to inefficient usage of GPU memory bandwidth and waste of capability of modern GPUs. To improve the ray tracing performance on GPU, we propose a lookup-table-based ray tracing method which is specially optimized towards the GPU memory system for processing a massive number of rays. The proposed method is based on a key observation that many of these applications normally involves a massive number of rays, but their ray tracing may not need to follow a specific execution order. Therefore, we divide the 3D space into many regions (called pyramids) and group together the rays falling into the same pyramid. For each ray group, the volume is rotated and resampled for their raytracing. This divide-and-rotate strategy allows the memory access of the ray tracing process to adopt a table-lookup approach and leads to better memory coalescing on GPU. Our proposed method was thoroughly evaluated in four volume setups with randomly-generated rays. The collapsed-cone convolution/superposition (CCCS) dose calculation method is also implemented with/without the proposed approach to verify the feasibility of our method. Compared with the direct GPU implementation of the popular 3DDDA algorithm, our method provides a speedup in the range of 1.91--2.94X for the volume settings we used. Major performance factors, including ray origins, volume size, and pyramid size, are also analyzed. The proposed technique was also found to be able to give a speedup of 1.61--2.17X over the original GPU implementation of the CCCS algorithm. Our experiment results indicate that the proposed approach is capable of offering better coalesced memory access which eventually boosts the raytracing performance on GPU. Moreover, our approach is conceptually simple and can be readily included into various applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2013:AAF, author = "Yun Liang and Tulika Mitra", title = "An analytical approach for fast and accurate design space exploration of instruction caches", journal = j-TECS, volume = "13", number = "3", pages = "43:1--43:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539039", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Application-specific system-on-chip platforms create the opportunity to customize the cache configuration for optimal performance with minimal chip area. Simulation, in particular trace-driven simulation, is widely used to estimate cache hit rates. However, simulation is too slow to be deployed in design space exploration, especially when there are hundreds of design points and the traces are huge. In this article, we propose a novel analytical approach for design space exploration of instruction caches. Given the program control flow graph (CFG) annotated only with basic block and control flow edge execution counts, we first model the cache states at each point of the CFG in a probabilistic manner. Then, we exploit the structural similarities among related cache configurations to estimate the cache hit rates for multiple cache configurations in one pass. Experimental results indicate that our analysis is 28--2,500 times faster compared to the fastest known cache simulator while maintaining high accuracy (0.2\% average error) in estimating cache hit rates for a large set of popular benchmarks. Moreover, compared to a state-of-the-art cache design space exploration technique, our approach achieves 304--8,086 times speedup and saves up to 62\% (average 7\%) energy for the evaluated benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bourke:2013:AES, author = "Timothy Bourke and Arcot Sowmya", title = "Analyzing an embedded sensor with timed automata in {Uppaal}", journal = j-TECS, volume = "13", number = "3", pages = "44:1--44:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539040", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "An infrared sensor is modeled and analyzed in Uppaal. The sensor typifies the sort of component that engineers regularly integrate into larger systems by writing interface hardware and software. In all, three main models are developed. In the first model, the timing diagram of the sensor is interpreted and modeled as a timed safety automaton. This model serves as a specification for the complete system. A second model that emphasizes the separate roles of driver and sensor is then developed. It is validated against the timing diagram model using an existing construction that permits the verification of timed trace inclusion, for certain models, by reachability analysis (i.e., model checking). A transmission correctness property is also stated by means of an auxiliary automaton and shown to be satisfied by the model. A third model is created from an assembly language driver program, using a direct translation from the instruction set of a processor with simple timing behavior. This model is validated against the driver component of the second timing diagram model using the timed trace inclusion validation technique. The approach and its limitations offer insight into the nature and challenges of programming in real time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Collins:2013:FFS, author = "Rebecca L. Collins and Luca P. Carloni", title = "Flexible filters in stream programs", journal = j-TECS, volume = "13", number = "3", pages = "45:1--45:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539041", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The stream-processing model is a natural fit for multicore systems because it exposes the inherent locality and concurrency of a program and highlights its separable tasks for efficient parallel implementations. We present flexible filters, a load-balancing optimization technique for stream programs. Flexible filters utilize the programmability of the cores in order to improve the data-processing throughput of individual bottleneck tasks by ``borrowing'' resources from neighbors in the stream. Our technique is distributed and scalable because all runtime load-balancing decisions are based on point-to-point handshake signals exchanged between neighboring cores. Load balancing with flexible filters increases the system-level processing throughput of stream applications, particularly those with large dynamic variations in the computational load of their tasks. We empirically evaluate flexible filters in a homogeneous multicore environment over a suite of five real-word stream programs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hashemi:2013:TMF, author = "Matin Hashemi and Mohammad H. Foroozannejad and Soheil Ghiasi", title = "Throughput-memory footprint trade-off in synthesis of streaming software on embedded multiprocessors", journal = j-TECS, volume = "13", number = "3", pages = "46:1--46:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539042", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We study the trade-off between throughput and memory footprint of embedded software that is synthesized from acyclic static dataflow (task graph) specifications targeting distributed memory multiprocessors. We identify iteration overlapping as a knob in the synthesis process by which one can trade application throughput for its memory requirement. Given an initial processor assignment and non-overlapped task schedule, we formally present underlying properties of the problem, such as constraints on a valid iteration overlapping, maximum possible throughput, and minimum memory footprint. Moreover, we develop an effective algorithm for generation of a rich set of design points that provide a range of trade-off options. Experimental results on a number of applications and architectures validate the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Biswas:2013:RTS, author = "Swarnendu Biswas and Rajib Mall and Manoranjan Satpathy", title = "A regression test selection technique for embedded software", journal = j-TECS, volume = "13", number = "3", pages = "47:1--47:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539043", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The current approaches for regression test selection of embedded programs are usually based on data- and control-dependency analyses, often augmented with human reasoning. Existing techniques do not take into account additional execution dependencies which may exist among code elements in such programs due to features such as tasks, task deadlines, task precedences, and intertask communications. In this context, we propose a model-based regression test selection technique for such programs. Our technique first constructs a graph model of the program; the proposed graph model has been designed to capture several characteristics of embedded programs, such as task precedence order, priority, intertask communication, timers, exceptions and interrupt handlers, which we consider important for regression-test selection. Our regression test selection technique selects test cases based on an analysis of the constructed graph model. We have implemented our technique to realize a prototype tool. The experimental results obtained using this tool show that, on average, our approach selects about 28.33\% more regression test cases than those selected by a traditional approach. We observed that, on average, 36.36\% of the fault-revealing test cases were overlooked by the existing regression test selection technique.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Majumdar:2013:TRO, author = "Rupak Majumdar and Elaine Render and Paulo Tabuada", title = "A theory of robust omega-regular software synthesis", journal = j-TECS, volume = "13", number = "3", pages = "48:1--48:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539044", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A key property for systems subject to uncertainty in their operating environment is robustness: ensuring that unmodeled but bounded disturbances have only a proportionally bounded effect upon the behaviors of the system. Inspired by ideas from robust control and dissipative systems theory, we present a formal definition of robustness as well as algorithmic tools for the design of optimally robust controllers for $ \omega $ -regular properties on discrete transition systems. Formally, we define metric automata --- automata equipped with a metric on states --- and strategies on metric automata which guarantee robustness for $ \omega $-regular properties. We present fixed-point algorithms to construct optimally robust strategies in polynomial time. In contrast to strategies computed by classical graph theoretic approaches, the strategies computed by our algorithm ensure that the behaviors of the controlled system gracefully degrade under the action of disturbances; the degree of degradation is parameterized by the magnitude of the disturbance. We show an application of our theory to the design of controllers that tolerate infinitely many transient errors provided they occur infrequently enough.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{You:2013:EAC, author = "Yi-Ping You and Shen-Hong Wang", title = "Energy-aware code motion for {GPU} shader processors", journal = j-TECS, volume = "13", number = "3", pages = "49:1--49:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539045", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are now being widely adopted in system-on-a-chip designs, and they are often used in embedded systems for manipulating computer graphics or even for general-purpose computation. Energy management is of concern to both hardware and software designers. In this article, we present an energy-aware code-motion framework for a compiler to generate concentrated accesses to input and output (I/O) buffers inside a GPU. Our solution attempts to gather the I/O buffer accesses into clusters, thereby extending the time period during which the I/O buffers are clock or power gated. We performed experiments in which the energy consumption was simulated by incorporating our compiler-analysis and code-motion framework into an in-house compiler tool. The experimental results demonstrated that our mechanisms were effective in reducing the energy consumption of the shader processor by an average of 13.1\% and decreasing the energy-delay product by 2.2\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2013:RAE, author = "Tiantian Liu and Alex Orailoglu and Chun Jason Xue and Minming Li", title = "Register allocation for embedded systems to simultaneously reduce energy and temperature on registers", journal = j-TECS, volume = "13", number = "3", pages = "50:1--50:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539046", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy and thermal issues are two important concerns for embedded system design. Diminished energy dissipation leads to a longer battery life, while reduced temperature hotspots decelerate the physical failure mechanisms. The instruction fetch logic associated with register access has a significant contribution towards the total energy consumption. Meanwhile, the register file has also been previously shown to exhibit the highest temperature compared to the rest of the components in an embedded processor. Therefore, the optimization of energy and the resolution of the thermal issue for register accesses are of great significance. In this article, register allocation techniques are studied to simultaneously reduce energy consumption and heat buildup on register accesses for embedded systems. Contrary to prevailing intuition, we observe that optimizing energy and optimizing temperature on register accesses conflict with each other. We introduce a rotator hardware in the instruction decoder to facilitate a balanced solution for the two conflicting objectives. Algorithms for register allocation and refinement are proposed based on the access patterns and the effects of the rotator. Experimental results show that the proposed algorithms obtain notable improvements of energy and peak temperature for embedded applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lizarraga:2013:DPF, author = "Adrian Lizarraga and Roman Lysecky and Susan Lysecky and Ann Gordon-Ross", title = "Dynamic profiling and fuzzy-logic-based optimization of sensor network platforms", journal = j-TECS, volume = "13", number = "3", pages = "51:1--51:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539047", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The commercialization of sensor-based platforms is facilitating the realization of numerous sensor network applications with diverse application requirements. However, sensor network platforms are becoming increasingly complex to design and optimize due to the multitude of interdependent parameters that must be considered. To further complicate matters, application experts oftentimes are not trained engineers, but rather biologists, teachers, or agriculturists who wish to utilize the sensor-based platforms for various domain-specific tasks. To assist both platform developers and application experts, we present a centralized dynamic profiling and optimization platform for sensor-based systems that enables application experts to rapidly optimize a sensor network for a particular application without requiring extensive knowledge of, and experience with, the underlying physical hardware platform. In this article, we present an optimization framework that allows developers to characterize application requirements through high-level design metrics and fuzzy-logic-based optimization. We further analyze the benefits of utilizing dynamic profiling information to eliminate the guesswork of creating a ``good'' benchmark, present several reoptimization evaluation algorithms used to detect if re-optimization is necessary, and highlight the benefits of the proposed dynamic optimization framework compared to static optimization alternatives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ben-Asher:2013:BUV, author = "Yosi Ben-Asher and Nadav Rotem", title = "The benefits of using variable-length pipelined operations in high-level synthesis", journal = j-TECS, volume = "13", number = "3", pages = "52:1--52:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539048", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Current high-level synthesis systems synthesize arithmetic units of a fixed known number of stages, and the scheduler mainly determines when units are activated. We focus on scheduling techniques for the high-level synthesis of pipelined arithmetic units where the number of stages of these operations is a free parameter of the synthesis. This problem is motivated by the ability to automatically create pipelined functional units, such as multipliers, with different pipe lengths. These units have different characteristics in terms of parallelism level, clock latency, frequency, etc. This article presents the Variable-length Pipeline Scheduler (VPS). The ability to synthesize variable-length pipelined units expands the known scheduling problem of high-level synthesis to include a search for a minimal number of hardware units (operations) and their desired number of stages. The proposed search procedure is based on algorithms that find a local minima in a d -dimensional grid, thus avoiding the need to evaluate all possible points in the space. We have implemented a C language compiler for VPS targeting FPGAs. Our results demonstrate that using variable-length pipeline units can reduce the overall resource usage and improve the execution time when synthesized onto an FPGA. The proposed search is sufficiently fast, taking only a few seconds, allowing an interactive mode of work. A comparison with xPilot shows a significant saving of hardware resources while maintaining comparable execution times of the resulting circuits. This work is an extension of a previous paper [Ben-Asher and Rotem 2008]", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2013:RDD, author = "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and Che-Wei Chang", title = "A resource-driven {DVFS} scheme for smart handheld devices", journal = j-TECS, volume = "13", number = "3", pages = "53:1--53:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539049", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Reducing the energy consumption of the emerging genre of smart handheld devices while simultaneously maintaining mobile applications and services is a major challenge. This work is inspired by an observation on the resource usage patterns of mobile applications. In contrast to existing DVFS scheduling algorithms and history-based prediction techniques, we propose a resource-driven DVFS scheme in which resource state machines are designed to model the resource usage patterns in an online fashion to guide DVFS. We have implemented the proposed scheme on Android smartphones and conducted experiments based on real-world applications. The results are very encouraging and demonstrate the efficacy of the proposed scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kyrkou:2013:HAR, author = "Christos Kyrkou and Christos Ttofis and Theocharis Theocharides", title = "A hardware architecture for real-time object detection using depth and edge information", journal = j-TECS, volume = "13", number = "3", pages = "54:1--54:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539050", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Emerging embedded 3D vision systems for robotics and security applications utilize object detection to perform video analysis in order to intelligently interact with their host environment and take appropriate actions. Such systems have high performance and high detection-accuracy demands, while requiring low energy consumption, especially when dealing with embedded mobile systems. However, there is a large image search space involved in object detection, primarily because of the different sizes in which an object may appear, which makes it difficult to meet these demands. Hence, it is possible to meet such constraints by reducing the search space involved in object detection. To this end, this article proposes a depth and edge accelerated search method and a dedicated hardware architecture that implements it to provide an efficient platform for generic real-time object detection. The hardware integration of depth and edge processing mechanisms, with a support vector machine classification core onto an FPGA platform, results in significant speed-ups and improved detection accuracy. The proposed architecture was evaluated using images of various sizes, with results indicating that the proposed architecture is capable of achieving real-time frame rates for a variety of image sizes (271 fps for 320 $ \times $ 240, 42 fps for 640 $ \times $ 480, and 23 fps for 800 $ \times $ 600) compared to existing works, while reducing the false-positive rate by 52\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2013:ALC, author = "Li-Pin Chang and Tung-Yang Chou and Li-Chun Huang", title = "An adaptive, low-cost wear-leveling algorithm for multichannel solid-state disks", journal = j-TECS, volume = "13", number = "3", pages = "55:1--55:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2539036.2539051", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Dec 18 19:07:39 MST 2013", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multilevel flash memory cells double or even triple storage density, producing affordable solid-state disks for end users. As flash memory endures only limited program-erase cycles, solid-state disks employ wear-leveling methods to prevent any portions of flash memory from being retired prematurely. Modern solid-state disks must consider wear evenness at both block and channel levels. This study first presents a block-level wear-leveling method whose design has two new ideas. First, the proposed method reuses the intelligence available in flash-translation layers so it does not require any new data structures. Second, it adaptively tunes the threshold of block-level wear leveling according to the runtime write pattern. This study further introduces a new channel-level wear-leveling strategy, because block-level wear leveling is confined to a channel, but realistic workloads do not evenly write all channels. The proposed method swaps logical blocks among channels for achieving an eventually-even state of channel lifetimes. A series of trace-driven simulations show that our wear-leveling method outperforms existing approaches in terms of wear evenness and overhead reduction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2014:EES, author = "Sandeep K. Shukla", title = "Editorial: Embedded systems --- more than methodology", journal = j-TECS, volume = "13", number = "3s", pages = "99:1--99:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2587894", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Daneshtalab:2014:ESI, author = "Masoud Daneshtalab and Maurizio Palesi and Juha Plosila", title = "Editorial: Special issue on design challenges for many-core processors", journal = j-TECS, volume = "13", number = "3s", pages = "100:1--100:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567941", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Koohi:2014:TSL, author = "Somayyeh Koohi and Yawei Yin and Shaahin Hessabi and S. J. Ben Yoo", title = "Towards a scalable, low-power all-optical architecture for networks-on-chip", journal = j-TECS, volume = "13", number = "3s", pages = "101:1--101:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567930", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a scalable wavelength-routed optical Network on Chip (NoC) based on the Spidergon topology, named Power-efficient Scalable Wavelength-routed Network-on-chip (PeSWaN). The key idea of the proposed all-optical architecture is the utilization of per-receiver wavelengths in the data network to prevent network contention and the adoption of per-sender wavelengths in the control network to avoid end-point contention. By performing a series of simulations, we study the efficiency of the proposed architecture, its power and energy consumption, and the data transmission delay. Moreover, we compare the proposed architecture with electrical NoCs and alternative ONoC architectures under various traffic patterns.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lhuillier:2014:HHA, author = "Yves Lhuillier and Maroun Ojail and Alexandre Guerre and Jean-Marc Philippe and Karim Ben Chehida and Farhat Thabet and Caaliph Andriamisaina and Chafic Jaber and Rapha{\"e}l David", title = "{HARS}: a hardware-assisted runtime software for embedded many-core architectures", journal = j-TECS, volume = "13", number = "3s", pages = "102:1--102:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2517311", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The current trend in embedded computing consists in increasing the number of processing resources on a chip. Following this paradigm, cluster-based many-core accelerators with a shared hierarchical memory have emerged. Handling synchronizations on these architectures is critical since parallel implementations speed-ups of embedded applications strongly depend on the ability to exploit the largest possible number of cores while limiting task management overhead. This article presents the combination of a low-overhead complete runtime software and a flexible hardware accelerator for synchronizations called HARS (Hardware-Assisted Runtime Software). Experiments on a multicore test chip showed that the hardware accelerator for synchronizations has less than 1\% area overhead compared to a cluster of the chip while reducing synchronization latencies (up to 2.8 times compared to a test-and-set implementation) and contentions. The runtime software part offers basic features like memory management but also optimized execution engines to allow the easy and efficient extraction of the parallelism in applications with multiple programming models. By using the hardware acceleration as well as a very low overhead task scheduling software technique, we show that HARS outperforms an optimized state-of-the-art task scheduler by 13\% for the execution of a parallel application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2014:CTR, author = "Qiang Yang and Jian Fu and Raphael Poss and Chris Jesshope", title = "On-chip traffic regulation to reduce coherence protocol cost on a microthreaded many-core architecture with distributed caches", journal = j-TECS, volume = "13", number = "3s", pages = "103:1--103:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567931", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "When hardware cache coherence scales to many cores on chip, over saturated traffic of the shared memory system may offset the benefit from massive hardware concurrency. In this article, we investigate the cost of a write-update protocol in terms of on-chip memory network traffic and its adverse effects on the system performance based on a multithreaded many-core architecture with distributed caches. We discuss possible software and hardware solutions to alleviate the network pressure. We find that in the context of massive concurrency, by introducing a write-merging buffer with 0.46\% area overhead to each core, applications with good locality and concurrency are boosted up by 18.74\% in performance on average. Other applications also benefit from this addition and even achieve a throughput increase of 5.93\%. In addition, this improvement indicates that higher levels of concurrency per core can be exploited without impacting performance, thus tolerating latency better and giving higher processor efficiencies compared to other solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Parikh:2014:FCF, author = "Ritesh Parikh and Valeria Bertacco", title = "{ForEVeR}: a complementary formal and runtime verification approach to correct {NoC} functionality", journal = j-TECS, volume = "13", number = "3s", pages = "104:1--104:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2514871", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As silicon technology scales, modern processor and embedded systems are rapidly shifting towards complex chip multi-processor (CMP) and system-on-chip (SoC) designs. As a side effect of complexity of these designs, ensuring their correctness has become increasingly problematic. Within these domains, Network-on-Chips (NoCs) are a de-facto choice to implement on-chip interconnect; their design is quickly becoming extremely complex in order to keep up with communication performance demands. As a result, design errors in the NoC may go undetected and escape into the final silicon. In this work, we propose ForEVeR, a solution that complements the use of formal methods and runtime verification to ensure functional correctness in NoCs. Formal verification, due to its scalability limitations, is used to verify smaller modules, such as individual router components. To deliver correctness guarantees for the complete network, we propose a network-level detection and recovery solution that monitors the traffic in the NoC and protects it against escaped functional bugs. To this end, ForEVeR augments the baseline NoC with a lightweight checker network that alerts destination nodes of incoming packets ahead of time. If a bug is detected, flagged by missed packet arrivals, our recovery mechanism delivers the in-flight data safely to the intended destination via the checker network. ForEVeR's experimental evaluation shows that it can recover from NoC design errors at only 4.9\% area cost for an $ 8 \times 8 $ mesh interconnect, over a time interval ranging from 0.5K to 30K cycles per recovery event, and it incurs no performance overhead in the absence of errors. ForEVeR can also protect NoC operations against soft-errors: a growing concern with the scaling of silicon. ForEVeR leverages the same monitoring hardware to detect soft-error manifestations, in addition to design-errors. Recovery of the soft-error affected packets is guaranteed by building resiliency features into our checker network. ForEVeR incurs minimal performance penalty up to a flit error rate of 0.01\% in lightly loaded networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DelBarrio:2014:ULP, author = "Alberto A. {Del Barrio} and Nader Bagherzadeh and Rom{\'a}n Hermida", title = "Ultra-low-power adder stage design for exascale floating point units", journal = j-TECS, volume = "13", number = "3s", pages = "105:1--105:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567932", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Currently, the most powerful supercomputers can provide tens of petaflops. Future many-core systems are estimated to provide an exaflop. However, the power budget limitation makes these machines still infeasible and unaffordable. Floating Point Units (FPUs) are critical from both the power consumption and performance points of view of today's microprocessors and supercomputers. Literature offers very different designs. Some of them are focused on increasing performance no matter the penalty, and others on decreasing power at the expense of lower performance. In this article, we propose a novel approach for reducing the power of the FPU without degrading the rest of parameters. Concretely, this power reduction is also accompanied by an area reduction and a performance improvement. Hence, an overall energy gain will be produced. According to our experiments, our proposed unit consumes 17.5\%, 23\% and 16.5\% less energy for single, double and quadruple precision, with an additional 15\%, 21.5\% and 14.5\% delay reduction, respectively. Furthermore, area is also diminished by 4\%, 4.5\% and 5\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2014:YES, author = "Yu-Jen Huang and Jin-Fu Li", title = "Yield-enhancement schemes for multicore processor and memory stacked {$3$D ICs}", journal = j-TECS, volume = "13", number = "3s", pages = "106:1--106:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567933", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A three-dimensional (3D) integrated circuit (IC) with multiple dies vertically connected by through-silicon-via (TSV) offers many benefits over current 2D ICs. Multicore logic-memory die stacking has been considered as one candidate for 3D ICs by utilizing the TSV to provide high data bandwidth between logic and memory. However, 3D ICs suffer from the low-yield issue. This article proposes effective yield-enhancement techniques for multicore die-stacked 3D ICs. Two reconfiguration schemes are proposed to logically swap the positions of cores in the dies of 3D ICs such that the yield of 3D ICs is increased. Two algorithms also are proposed to determine the reconfiguration effectively. Simulation results show that the proposed reconfiguration schemes can achieve a yield gain ranging from 1\% to 11\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Arnold:2014:TPH, author = "Oliver Arnold and Emil Matus and Benedikt Noethen and Markus Winter and Torsten Limberg and Gerhard Fettweis", title = "{Tomahawk}: Parallelism and heterogeneity in communications signal processing {MPSoCs}", journal = j-TECS, volume = "13", number = "3s", pages = "107:1--107:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2517087", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneity and parallelism in MPSoCs for 4G (and beyond) communications signal processing are inevitable in order to meet stringent power constraints and performance requirements. The question arises on how to cope with the problem of system programmability and runtime management incurred by the statically or even dynamically varying number and type of processing elements. This work addresses this challenge by proposing the concept of a heterogeneous many-core platform called Tomahawk. Apart from the definition of the system architecture, in this approach a unified framework including a model of computation, a programming interface and a dedicated runtime management unit called CoreManager is proposed. The increase of system complexity in terms of application parallelism and number of resources may lead to a dramatic increase of the management costs, hence causing performance degradation. For this reason, the efficient implementation of the CoreManager becomes a major issue in system design. This work compares the performance and capabilities of various CoreManager HW/SW solutions, based on ASIC, RISC and ASIP paradigms. The results demonstrate that the proposed ASIP-based solution approaches the performance of the ASIC realization, while preserving the full flexibility of the software (RISC-based) implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jin:2014:PPA, author = "Yuho Jin and Timothy Mark Pinkston", title = "{PAIS}: Parallelism-aware interconnect scheduling in multicores", journal = j-TECS, volume = "13", number = "3s", pages = "108:1--108:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567934", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multicore processors have the potential to deliver scalable performance by distributing computation across multiple cores. However, the communication cost of parallel application thread execution may significantly limit the performance achievable due to latency and contention on shared resources in the on-chip network of multicores experienced by packets from critical threads. We present PAIS, Parallelism-Aware Interconnect Scheduling, that bolsters performance and energy efficiency of parallel applications. PAIS dynamically detects thread execution progress based on communication latency and scheduling, and it accelerates communication for slowly executing threads by prioritizing packets from those threads with flow control and priority-based arbitration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Casu:2014:UMI, author = "Mario R. Casu and Francesco Colonna and Marco Crepaldi and Danilo Demarchi and Mariagrazia Graziano and Maurizio Zamboni", title = "{UWB} microwave imaging for breast cancer detection: Many-core, {GPU}, or {FPGA?}", journal = j-TECS, volume = "13", number = "3s", pages = "109:1--109:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2530534", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "An UWB microwave imaging system for breast cancer detection consists of antennas, transceivers, and a high-performance embedded system for elaborating the received signals and reconstructing breast images. In this article we focus on this embedded system. To accelerate the image reconstruction, the Beamforming phase has to be implemented in a parallel fashion. We assess its implementation in three currently available high-end platforms based on a multicore CPU, a GPU, and an FPGA, respectively. We then project the results applying technology scaling rules to future many-core CPUs, many-thread GPUs, and advanced FPGAs. We consider an optimistic case in which available resources increase according to Moore's law only, and a pessimistic case in which only a fraction of those resources are available due to a limited power budget. In both scenarios, an implementation that includes a high-end FPGA outperforms the other alternatives. Since the number of effectively usable cores in future many-cores will be power-limited, and there is a trend toward the integration of power-efficient accelerators, we conjecture that a chip consisting of a many-core section and a reconfigurable logic section will be the perfect platform for this application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Palesi:2014:ESS, author = "Maurizio Palesi and Todor Stefanov", title = "Editorial: Special Section on {ESTIMedia'13}", journal = j-TECS, volume = "13", number = "3s", pages = "110:1--110:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567942", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2014:EOR, author = "Gang Chen and Kai Huang and Alois Knoll", title = "Energy optimization for real-time multiprocessor system-on-chip with optimal {DVFS} and {DPM} combination", journal = j-TECS, volume = "13", number = "3s", pages = "111:1--111:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567935", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy optimization is a critical design concern for embedded systems. Combining D VFS+DPM is considered as one preferable technique to reduce energy consumption. There have been optimal DVFS+DPM algorithms for periodic independent tasks running on uniprocessor in the literature. Optimal combination of DVFS and DPM for periodic dependent tasks on multicore systems is however not yet reported. The challenge of this problem is that the idle intervals of cores are not easy to model. In this article, a novel technique is proposed to directly model the idle intervals of individual cores such that both DVFS and DPM can be optimized at the same time. Based on this technique, the energy optimization problem is formulated by means of mixed integrated linear programming. We also present techniques to prune the exploration space of the formulation. Experimental results using real-world benchmarks demonstrate the effectiveness of our approach compared to existing approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Abdel-Khalek:2014:PSP, author = "Rawan Abdel-Khalek and Valeria Bertacco", title = "Post-silicon platform for the functional diagnosis and debug of networks-on-chip", journal = j-TECS, volume = "13", number = "3s", pages = "112:1--112:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567936", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The increasing number of units in today's systems-on-chip and multicore processors has led to complex intra-chip communication solutions. Specifically, Networks-on-Chip (NoCs) have emerged as a favorable fabric to provide high bandwidth and low latency in connecting many units in a same chip. To achieve these goals, the NoC often includes complex components and advanced features, leading to the development of large and highly complex interconnect subsystems. One of the biggest challenges in these designs is to ensure the correct functionality of this communication infrastructure. To support this goal, an increasing fraction of the validation effort has shifted to post-silicon validation, because it permits exercising network activities that are too complex to be validated in pre-silicon. However, post-silicon validation is hindered by the lack of observability of the network's internal operations and thus, diagnosing functional errors during this phase is very difficult. In this work, we propose a post-silicon validation platform that improves observability of network operations by taking periodic snapshots of the traffic traversing the network. Each node's local cache is configured to temporarily store the snapshot logs in a designated area reserved for post-silicon validation and relinquished after product release. Each snapshot log is analyzed locally by a software algorithm running on its corresponding core, in order to detect functional errors. Upon error detection, all snapshot logs are aggregated at a central location to extract additional debug data, including an overview of network traffic surrounding the error event, as well as a partial reconstruction of the routes followed by packets in flight at the time. In our experiments, we found that this approach allows us to detect several types of functional errors, as well as observe, on average, over 50\% of the network's traffic and reconstruct at least half of each of their routes through the network.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dasari:2014:NCA, author = "Dakshina Dasari and Borislav Nikoli{\'c} and Vincent N{\'e}lis and Stefan M. Petters", title = "{NoC} contention analysis using a branch-and-prune algorithm", journal = j-TECS, volume = "13", number = "3s", pages = "113:1--113:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567937", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "``Many-core'' systems based on a Network-on-Chip (NoC) architecture offer various opportunities in terms of performance and computing capabilities, but at the same time they pose many challenges for the deployment of real-time systems, which must fulfill specific timing requirements at runtime. It is therefore essential to identify, at design time, the parameters that have an impact on the execution time of the tasks deployed on these systems and the upper bounds on the other key parameters. The focus of this work is to determine an upper bound on the traversal time of a packet when it is transmitted over the NoC infrastructure. Towards this aim, we first identify and explore some limitations in the existing recursive-calculus-based approaches to compute the Worst-Case Traversal Time (WCTT) of a packet. Then, we extend the existing model by integrating the characteristics of the tasks that generate the packets. For this extended model, we propose an algorithm called ``Branch and Prune'' (BP). Our proposed method provides tighter and safe estimates than the existing recursive-calculus-based approaches. Finally, we introduce a more general approach, namely ``Branch, Prune and Collapse'' (BPC) which offers a configurable parameter that provides a flexible trade-off between the computational complexity and the tightness of the computed estimate. The recursive-calculus methods and BP present two special cases of BPC when a trade-off parameter is $1$ or $ \infty $, respectively. Through simulations, we analyze this trade-off, reason about the implications of certain choices, and also provide some case studies to observe the impact of task parameters on the WCTT estimates.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lashgar:2014:HHI, author = "Ahmad Lashgar and Ahmad Khonsari and Amirali Baniasadi", title = "{HARP}: {Harnessing inActive thReads in many-core Processors}", journal = j-TECS, volume = "13", number = "3s", pages = "114:1--114:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567938", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "SIMT accelerators are equipped with thousands of computational resources. Conventional accelerators, however, fail to fully utilize available resources due to branch and memory divergences. This underutilization is manifested in two underlying inefficiencies: pipeline width underutilization and pipeline depth underutilization. Width underutilization occurs when SIMD execution units are not entirely utilized due to branch divergences. This affects lane activity and results in SIMD inefficiency. Depth underutilization takes place when the pipeline runs out of active threads and is forced to leave pipeline stages idle. This work addresses both inefficiencies by harnessing inactive threads available to the pipeline. We introduce Harnessing inActive thReads in many-core Processors (or simply HARP) to improve width and depth utilization in accelerators. We show how using inactive yet ready threads can enhance performance. Moreover, we investigate implementation details and study microarchitectural changes needed to build a HARP-enhanced accelerator. Furthermore, we evaluate HARP under a variety of microarchitectural design points. We measure the area overhead associated with HARP and compare to conventional alternatives. Under Fermi-like GPUs, we show that HARP provides 10\% speedup on average (maximum of 1.6X) at the cost of 3.5\% area overhead. Our analysis shows that HARP performs better under narrower SIMD and shorter pipelines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banaiyanmofrad:2014:NBF, author = "Abbas Banaiyanmofrad and Gustavo Gir{\~a}o and Nikil Dutt", title = "{NoC}-based fault-tolerant cache design in chip multiprocessors", journal = j-TECS, volume = "13", number = "3s", pages = "115:1--115:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567939", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in technology scaling increasingly make emerging Chip MultiProcessor (CMP) platforms more susceptible to failures that cause various reliability challenges. In such platforms, error-prone on-chip memories (caches) continue to dominate the chip area. Also, Network-on-Chip (NoC) fabrics are increasingly used to manage the scalability of these architectures. We present a novel solution for efficient implementation of fault-tolerant design of Last-Level Cache (LLC) in CMP architectures. The proposed approach leverages the interconnection network fabric to protect the LLC cache banks against permanent faults in an efficient and scalable way. During an LLC access to a faulty block, the network detects and corrects the faults, returning the fault-free data to the requesting core. Leveraging the NoC interconnection fabric, designers can implement any cache fault-tolerant scheme in an efficient, modular, and scalable manner for emerging multicore/manycore platforms. We propose four different policies for implementing a remapping-based fault-tolerant scheme leveraging the NoC fabric in different settings. The proposed policies enable design trade-offs between NoC traffic (packets sent through the network) and the intrinsic parallelism of these communication mechanisms, allowing designers to tune the system based on design constraints. We perform an extensive design space exploration on NoC benchmarks to demonstrate the usability and efficacy of our approach. In addition, we perform sensitivity analysis to observe the behavior of various policies in reaction to improvements in the NoC architecture. The overheads of leveraging the NoC fabric are minimal: on an 8-core, 16-cache-bank CMP we demonstrate reliable access to LLCs with additional overheads of less than 3\% in area and less than 7\% in power.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bahirat:2014:MHP, author = "Shirish Bahirat and Sudeep Pasricha", title = "{METEOR}: Hybrid photonic ring-mesh network-on-chip for multicore architectures", journal = j-TECS, volume = "13", number = "3s", pages = "116:1--116:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2567940", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With increasing application complexity and improvements in process technology, Chip MultiProcessors (CMPs) with tens to hundreds of cores on a chip are becoming a reality. Networks-on-Chip (NoCs) have emerged as scalable communication fabrics that can support high bandwidths for these massively parallel multicore systems. However, traditional electrical NoC implementations still need to overcome the challenges of high data transfer latencies and large power consumption. On-chip photonic interconnects with high performance-per-watt characteristics have recently been proposed as an alternative to address these challenges for intra-chip communication. In this article, we explore using low-cost photonic interconnects on a chip to enhance traditional electrical NoCs. Our proposed hybrid photonic ring-mesh NoC (METEOR) utilizes a configurable photonic ring waveguide coupled to a traditional 2D electrical mesh NoC. Experimental results indicate a strong motivation to consider the proposed architecture for future CMPs, as it can provide about 5$ \times $ reduction in power consumption and improved throughput and access latencies, compared to traditional electrical 2D mesh and torus NoC architectures. Compared to other previously proposed hybrid photonic NoC fabrics such as the hybrid photonic torus, Corona, and Firefly, our proposed fabric is also shown to have lower photonic area overhead, power consumption, and energy-delay product, while maintaining competitive throughput and latency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "116", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Foglia:2014:ERI, author = "Pierfrancesco Foglia and Marco Solinas", title = "Exploiting replication to improve performances of {NUCA-based} {CMP} systems", journal = j-TECS, volume = "13", number = "3s", pages = "117:1--117:??", month = mar, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2566568", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Mar 24 17:17:02 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Improvements in semiconductor nanotechnology made chip multiprocessors the reference architecture for high-performance microprocessors. CMPs usually adopt large Last-Level Caches (LLC) shared among cores and private L1 caches, whose performances depend on the wire-delay dominated response time of LLC. NUCA (NonUniform Cache Architecture) caches represent a viable solution for tolerating wire-delay effects. In this article, we present Re-NUCA, a NUCA cache that exploits replication of blocks inside the LLC to avoid performance limitations of D-NUCA caches due to conflicting access to shared data. Results show that a Re-NUCA LLC permits to improve performances of more than 5\% on average, and up to 15\% for applications that strongly suffer from conflicting access to shared data, while reducing network traffic and power consumption with respect to D-NUCA caches. Besides, it outperforms different S-NUCA schemes optimized with victim replication.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "117", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2014:EEE, author = "Sandeep K. Shukla", title = "Editorial: Embedded everywhere for everyone", journal = j-TECS, volume = "13", number = "4", pages = "74:1--74:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2559122", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lam:2014:REC, author = "Siew-Kei Lam and Thambipillai Srikanthan and Christopher T. Clarke", title = "Rapid evaluation of custom instruction selection approaches with {FPGA} estimation", journal = j-TECS, volume = "13", number = "4", pages = "75:1--75:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560014", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The main aim of this article is to demonstrate that a fast and accurate FPGA estimation engine is indispensable in design flows for custom instruction (template) selection. The need for a FPGA estimation engine stems from the difficulty in predicting the FPGA performance measures of selected custom instructions. We will present a FPGA estimation technique that partitions the high-level representation of custom instructions into clusters based on the structural organization of the target FPGA, while taking into account general logic synthesis principles adopted by FPGA tools. In this work, we have evaluated a widely used graph covering algorithm with various heuristics for custom instruction selection. In addition, we present an algorithm called Refined Largest Fit First (RLFF) that relies on a graph covering heuristic to select non-overlapping superset templates, which typically incorporate frequently used basic templates. The initial solution is further refined by considering overlapping templates that were ignored previously to see if their introduction could lead to higher performance. While RLFF provides the most efficient cover compared to the ILP method and other graph covering heuristics, FPGA estimation results reveals that RLFF leads to the worst performance in certain applications. It is therefore a worthy proposition to equip design flows with accurate FPGA estimation in order to rapidly determine the most profitable custom instruction approach for a given application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Maggio:2014:TSC, author = "Martina Maggio and Federico Terraneo and Alberto Leva", title = "Task scheduling: a control-theoretical viewpoint for a general and flexible solution", journal = j-TECS, volume = "13", number = "4", pages = "76:1--76:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560015", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a new approach to the design of task scheduling algorithms, where system-theoretical methodologies are used throughout. The proposal implies a significant perspective shift with respect to mainstream design practices, but yields large payoffs in terms of simplicity, flexibility, solution uniformity for different problems, and possibility to formally assess the results also in the presence of unpredictable run-time situations. A complete implementation example is illustrated, together with various comparative tests, and a methodological treatise of the matter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2014:EEE, author = "Wei Dong and Yunhao Liu and Chun Chen and Lin Gu and Xiaofan Wu", title = "{Elon}: Enabling efficient and long-term reprogramming for wireless sensor networks", journal = j-TECS, volume = "13", number = "4", pages = "77:1--77:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560017", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a new mechanism called Elon for enabling efficient and long-term reprogramming in wireless sensor networks. Elon reduces the transferred code size significantly by introducing the concept of replaceable component. It avoids the cost of hardware reboot with a novel software reboot mechanism. Moreover, it significantly prolongs the reprogrammable lifetime (i.e., the time period during which the sensor nodes can be reprogrammed) by avoiding flash writes for TelosB nodes. Experimental results show that Elon transfers up to 120--389 times less information than Deluge, and 18--42 times less information than Stream. The software reboot mechanism that Elon applies reduces the rebooting cost by 50.4\%--53.87\% in terms of beacon packets, and 56.83\% in terms of unsynchronized nodes. In addition, Elon prolongs the reprogrammable lifetime by a factor of 3.3.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2014:BAM, author = "Shuai Li and Yuesheng Lou and Bo Liu", title = "{Bluetooth} aided mobile phone localization: a nonlinear neural circuit approach", journal = j-TECS, volume = "13", number = "4", pages = "78:1--78:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560018", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "It is meaningful to design a strategy to roughly localize mobile phones without a GPS by exploiting existing conditions and devices especially in environments without GPS availability (e.g., tunnels, subway stations, etc.). The availability of Bluetooth devices for most phones and the existence of a number of GPS equipped phones in a crowd of phone users enable us to design a Bluetooth aided mobile phone localization strategy. With the position of GPS equipped phones as beacons, and with the Bluetooth connection between neighbor phones as proximity constraints, we formulate the problem into an inequality problem defined on the Bluetooth network. A recurrent neural network is developed to solve the problem distributively in real time. The convergence of the neural network and the solution feasibility to the defined problem are both theoretically proven. The hardware implementation architecture of the proposed neural network is also given in this article. As applications, rough localizations of drivers in a tunnel and localization of customers in a supermarket are explored and simulated. Simulations demonstrate the effectiveness of the proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2014:MON, author = "Jingtong Hu and Qingfeng Zhuge and Chun Jason Xue and Wei-Che Tseng and Edwin H.-M. Sha", title = "Management and optimization for nonvolatile memory-based hybrid scratchpad memory on multicore embedded processors", journal = j-TECS, volume = "13", number = "4", pages = "79:1--79:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560019", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The recent emergence of various Non-Volatile Memories (NVMs), with many attractive characteristics such as low leakage power and high-density, provides us with a new way of addressing the memory power consumption problem. In this article, we target embedded CMPs, and propose a novel Hybrid Scratch Pad Memory (HSPM) architecture which consists of SRAM and NVM to take advantage of the ultra-low leakage power, high density of NVM, and fast access of SRAM. A novel data allocation algorithm as well as an algorithm to determine the NVM/SRAM ratio for the novel HSPM architecture are proposed. The experimental results show that the data allocation algorithm can reduce the memory access time by 33.51\% and the dynamic energy consumption by 16.81\% on average for the HSPM architecture when compared with a greedy algorithm. The NVM/SRAM size determination algorithm can further reduce the memory access time by 14.7\% and energy consumption by 20.1\% on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2014:MBM, author = "Heeseok Kim and Dong-Guk Han and Seokhie Hong and Jaecheol Ha", title = "Message blinding method requiring no multiplicative inversion for {RSA}", journal = j-TECS, volume = "13", number = "4", pages = "80:1--80:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560020", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a new message blinding methods requiring no multiplicative inversion for RSA. Most existing message blinding methods for RSA additionally require the multiplicative inversion, even though computational complexity of this operation is $ O(n^3) $ which is equal to that of the exponentiation. Thus, this additional operation is known to be the main drawback of the existing message blinding methods for RSA. In addition to requiring no additional multiplicative inversion, our new countermeasure provides the security against various power analysis attacks as well as general differential power analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mahdavikhah:2014:MFP, author = "Behzad Mahdavikhah and Ramin Mafi and Shahin Sirouspour and Nicola Nicolici", title = "A multiple-{FPGA} parallel computing architecture for real-time simulation of soft-object deformation", journal = j-TECS, volume = "13", number = "4", pages = "81:1--81:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560031", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hardware-based parallel computing is proposed for acceleration of finite-element (FE) analysis of linear elastic deformation models. An implementation of the Preconditioned Conjugate Gradient algorithm on N Field Programmable Gate Array (FPGA) devices solves the large linear system of equations arising from the FE discretization. The system employs a large number of customized fixed-point computing units with a high-throughput memory architecture. An implementation of this scalable architecture on four Altera EP3SE110 FPGA devices yields a peak performance of 604 Giga Operations per second. This enables haptic simulation of a 3-dimensional deformable object of 21000 elements at an update rate of 400Hz.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Axer:2014:BTP, author = "Philip Axer and Rolf Ernst and Heiko Falk and Alain Girault and Daniel Grund and Nan Guan and Bengt Jonsson and Peter Marwedel and Jan Reineke and Christine Rochange and Maurice Sebastian and Reinhard {Von Hanxleden} and Reinhard Wilhelm and Wang Yi", title = "Building timing predictable embedded systems", journal = j-TECS, volume = "13", number = "4", pages = "82:1--82:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560033", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A large class of embedded systems is distinguished from general-purpose computing systems by the need to satisfy strict requirements on timing, often under constraints on available resources. Predictable system design is concerned with the challenge of building systems for which timing requirements can be guaranteed a priori. Perhaps paradoxically, this problem has become more difficult by the introduction of performance-enhancing architectural elements, such as caches, pipelines, and multithreading, which introduce a large degree of uncertainty and make guarantees harder to provide. The intention of this article is to summarize the current state of the art in research concerning how to build predictable yet performant systems. We suggest precise definitions for the concept of ``predictability'', and present predictability concerns at different abstraction levels in embedded system design. First, we consider timing predictability of processor instruction sets. Thereafter, we consider how programming languages can be equipped with predictable timing semantics, covering both a language-based approach using the synchronous programming paradigm, as well as an environment that provides timing semantics for a mainstream programming language (in this case C). We present techniques for achieving timing predictability on multicores. Finally, we discuss how to handle predictability at the level of networked embedded systems where randomly occurring errors must be considered.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bathen:2014:ERC, author = "Luis Angel D. Bathen and Nikil D. Dutt", title = "Embedded {RAIDs}-on-chip for bus-based chip-multiprocessors", journal = j-TECS, volume = "13", number = "4", pages = "83:1--83:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2533316", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The dual effects of larger die sizes and technology scaling, combined with aggressive voltage scaling for power reduction, increase the error rates for on-chip memories. Traditional on-chip memory reliability techniques (e.g., ECC) incur significant power and performance overheads. In this article, we propose a low-power-and-performance-overhead Embedded RAID (E-RAID) strategy and present Embedded RAIDs-on-Chip (E-RoC), a distributed dynamically managed reliable memory subsystem for bus-based Chip-Multiprocessors. E-RoC achieves reliability through redundancy by optimizing RAID-like policies tuned for on-chip distributed memories. We achieve on-chip reliability of memories through the use of Distributed Dynamic ScratchPad Allocatable Memories (DSPAMs) and their allocation policies. We exploit aggressive voltage scaling to reduce power consumption overheads due to parallel DSPAM accesses, and rely on the E-RoC Manager to automatically handle any resulting voltage-scaling-induced errors. We demonstrate how E-RAIDs can further enhance the fault tolerance of traditional memory reliability approaches by designing E-RAID levels that exploit ECC. Finally, we show the power and flexibility of the E-RoC concept by showing the benefits of having a heterogeneous E-RAID levels that fit each application's needs (fault tolerance, power/energy, performance). Our experimental results on CHStone/Mediabench II benchmarks show that our E-RAID levels converge to 100\% error-free data rates much faster than traditional ECC approaches. Moreover, E-RAID levels that exploit ECC can guarantee 99.9\% error-free data rates at ultra low Vdd on average, where as traditional ECC approaches were able to attain at most 99.1\% error-free data rates. We observe an average of 22\% dynamic power consumption increase by using traditional ECC approaches with respect to the baseline (non-voltage scaled SPMs), whereas our E-RAID levels are able to save dynamic power consumption by an average of 27\% (w.r.t. the same non-voltage scaled SPMs baseline), while incurring worst-case 2\% higher performance overheads than traditional ECC approaches. By voltage scaling the memories, we see that traditional ECC approaches are able to save static energy by 6.4\% (average), where as our E-RAID approaches achieve 23.4\% static energy savings (average). Finally, we observe that mixing E-RAID levels allows us to further reduce the dynamic power consumption by up to 55.5\% at the cost of an average 5.6\% increase in execution time over traditional approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Logaras:2014:PAE, author = "Evangelos Logaras and Orsalia G. Hazapis and Elias S. Manolakos", title = "{Python} to accelerate embedded {SoC} design: a case study for systems biology", journal = j-TECS, volume = "13", number = "4", pages = "84:1--84:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560032", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present SysPy (System Python) a tool which exploits the strengths of the popular Python scripting language to boost design productivity of embedded System on Chips for FPGAs. SysPy acts as a ``glue'' software between mature HDLs, ready-to-use VHDL components and programmable processor soft IP cores. SysPy can be used to: (i) automatically translate hardware components described in Python into synthesizable VHDL, (ii) capture top-level structural descriptions of processor-centric SoCs in Python, (iii) implement all the steps necessary to compile the user's C code for an instruction set processor core and generate processor specific Tcl scripts that import to the design project all the necessary HDL files of the processor's description and instantiate/connect the core to other blocks in a synthesizable top-level Python description. Moreover, we have developed a Hardware Abstraction Layer (HAL) in Python which allows user applications running in a host PC to utilize effortlessly the SoC's resources in the FPGA. SysPy's design capabilities, when complemented with the developed HAL software API, provide all the necessary tools for hw/sw partitioning and iterative design for efficient SoC's performance tuning. We demonstrate how SysPy's design flow and functionalities can be used by building a processor-centric embedded SoC for computational systems biology. The designed SoC, implemented using a Xilinx Virtex-5 FPGA, combines the flexibility of a programmable soft processor core (Leon3) with the high performance of an application specific core to simulate flexibly and efficiently the stochastic behavior of large size biomolecular reaction networks. Such networks are essential for studying the dynamics of complex biological systems consisting of multiple interacting pathways.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rodrigues:2014:LPI, author = "Rance Rodrigues and Arunachalam Annamalai and Sandip Kundu", title = "A low-power instruction replay mechanism for design of resilient microprocessors", journal = j-TECS, volume = "13", number = "4", pages = "85:1--85:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560034", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "There is a growing concern about the increasing rate of defects in computing substrates. Traditional redundancy solutions prove to be too expensive for commodity microprocessor systems. Modern microprocessors feature multiple execution units to take advantage of instruction level parallelism. However, most workloads do not exhibit the level of instruction level parallelism that a typical microprocessor is resourced for. This offers an opportunity to reexecute instructions using idle execution units. But, relying solely on idle resources will not provide full instruction coverage and there is a need to explore other alternatives. To that end, we propose and evaluate two instruction replay schemes within the same core for online testing of the execution units. One scheme (RER) reexecutes only the retired instructions, while the other (REI) reexecutes all the issued instructions. The complete proposed solution requires a comparator and minor modifications to control logic, resulting in negligible hardware overhead. Both soft and hard error detection are considered and the performance and energy impact of both schemes are evaluated and compared against previously proposed redundant execution schemes. Results show that even though the proposed schemes result in a small performance penalty when compared to previous work, the energy overhead is significantly reduced.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tavana:2014:SHT, author = "Mohammad Khavari Tavana and Nasibeh Teimouri and Meisam Abdollahi and Maziar Goudarzi", title = "Simultaneous hardware and time redundancy with online task scheduling for low energy highly reliable standby-sparing system", journal = j-TECS, volume = "13", number = "4", pages = "86:1--86:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2523781/2560035", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Standby-sparing is one of the common techniques in order to design fault-tolerant safety-critical systems where the high level of reliability is needed. Recently, the minimization of energy consumption in embedded systems has attracted a lot of concerns. Simultaneous considering of high reliability and low energy consumption by DVS is a challenging problem in designing such a system, since using DVS has been shown to reduce the reliability profoundly. In this article, we have studied different schemes of standby-sparing systems from the energy consumption and reliability point of view. Moreover, we propose a new standby-sparing scheme which addresses both reliability and energy consumption jointly together. This scheme uses a simple energy management coupled with an online task scheduler which tries to dispatch those ready tasks which are expected to lead to high reliability and low energy consumption in the system. The effectiveness of the proposed scheme has been shown on TGFF under stochastic workloads. The results show 52\% improvement on energy saving compared to the conventional hot standby-sparing system. Moreover, two orders of magnitude higher reliability is obtained on average, while preserving the same level of energy saving as compared to the state-of-the-art low-energy standby-sparing system (LESS).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Riemens:2014:TSA, author = "Danny P. Riemens and Georgi N. Gaydadjiev and Chris I. de Zeeuw and Christos Strydis", title = "Towards scalable arithmetic units with graceful degradation", journal = j-TECS, volume = "13", number = "4", pages = "87:1--87:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2499367", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a new family of scalable arithmetic units (ScAUs) targeting resource-constrained, embedded devices. We, first, study the performance, power, area and scalability properties of general adders. Next, suitable error-detection schemes for low-power embedded systems are discussed. As a result, our ScAUs are enhanced with a suitable error-detection scheme, resulting in a Parity-Checked ScAU (PCScAU) design. The PCScAU strikes a flexible trade-off between space and time redundancy, offering dependability similar to high-end techniques for the area and power cost of low-end approaches. An alternative design, the Precision-Scalable Arithmetic Unit (PScAU) maintains throughput with degraded precision in case of hardware failures. The PScAU is targeting dependable applications where latency rather than numerical accuracy is more important. The PScAU's downscaled mode is also interesting for runtime thermal management due to its advantageous power consumption. We implemented and synthesized the PCScAU, PScAU and a few important reference designs (double-, triple- and quadruple-modular-redundancy adders with/without input gating) in 90- nm UMC technology. Overall, the PC-ScAU ranks first in 9 out of 10 power-delay-area (PDA)-product variants. It exhibits 16\% area savings and 12\% performance speedup for 7\% increase in total power consumption, compared to the cheapest form of conventional hardware replication with the same fault coverage. The PDA product of the PCScAU is, thus, reduced by 21\%. It is interesting that, while total power slightly increases, the PCScAU static power in fact decreases by 14\%. Therefore, for newer technology nodes where the static power component is significant, the PCScAU can also achieve-next to performance and area --- significant power improvements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2014:AWL, author = "Sung Kyu Park and Min Kyu Maeng and Ki-Woong Park and Kyu Ho Park", title = "Adaptive wear-leveling algorithm for {PRAM} main memory with a {DRAM} buffer", journal = j-TECS, volume = "13", number = "4", pages = "88:1--88:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2558427", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Phase Change RAM (PRAM) is a candidate to replace DRAM main memory due to its low idle power consumption and high scalability. However, its latency and endurance have generated problems in fulfilling its main memory role. The latency can be treated with a DRAM buffer, but the endurance problem remains, with three critical points that need to be improved despite the use of, existing wear-leveling algorithms. First, existing DRAM buffering schemes do not consider write count distribution. Second, swapping and shifting operations are performed statically. Finally, swapping and shifting operations are loosely coupled with a DRAM buffer. As a remedy to these drawbacks, we propose an adaptive wear-leveling algorithm that consists of three novel schemes for PRAM main memory with a DRAM buffer. The PRAM-aware DRAM buffering scheme reduces the write count and prevents skewed writing by considering the write count and clean data based on the least recently used (LRU) scheme. The adaptive multiple swapping and shifting scheme makes the write count even with the dynamic operation timing, the number of swapping pages being based on the workload pattern. Our DRAM buffer-aware swapping and shifting scheme reduces overhead by curbing additional swapping and shifting operations, thus reducing unnecessary write operations. To evaluate the wear-leveling effect, we have implemented a PIN-based wear-leveling simulator. The evaluation confirms that the PRAM lifetime increases from 0.68 years with the previous wear-leveling algorithm to 5.32 years with the adaptive wear-leveling algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anjum:2014:TTA, author = "Omer Anjum and Mubashir Ali and Teemu Pitk{\"a}nen and Jari Nurmi", title = "Transport triggered architecture to perform carrier synchronization for {LTE}", journal = j-TECS, volume = "13", number = "4", pages = "89:1--89:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560036", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article implementation of carrier frequency offset estimate for 20MHz LTE baseband processing is discussed. LTE (Long Term Evolution) is a wireless communication standard that makes use of some innovative techniques to gain very high data rates ({$>$100Mbps}). This goal for such a high throughput also imposes design challenges for the industry and academia such as in the case of handheld mobile devices where the power budget is very limited. Implicitly high throughput means we need more computation power and more energy. On the other hand industry is also struggling for a flexible hardware solution, or software defined a radio (SDR), to amortize the huge cost of required hardware changes as the wireless standards have kept evolving. Design innovations are now needed to confront those challenges of low power and flexible design without changing the hardware. The implementation is made on Transport Triggered Architecture (TTA), which is a unique concept in computer architecture design, based on the single instruction, ``MOVE''. The power consumption of the architecture when synthesized on 180nm technology at 180MHz and 1.8V is 18.39mW. The total area occupied excluding memory is 0.6mm$^2$. The proposed TTA solution has been compared with, a more ASIC (application specific integrated circuits), like ASIP (application specific instruction processor) solution and a coprocessor accelerator-based solution. The proposed solution is more flexible: easily programmable due to high level language support, easily scalable, and still efficient in energy consumption needed to complete the CFO (carrier frequency offset) estimation task. Because of these attractive characteristics, TTA is also a potential candidate for SDR platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Clemente:2014:AMR, author = "Juan Antonio Clemente and Javier Resano and Daniel Mozos", title = "An approach to manage reconfigurations and reduce area cost in hard real-time reconfigurable systems", journal = j-TECS, volume = "13", number = "4", pages = "90:1--90:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560037", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a methodology for building real-time reconfigurable systems that ensures that all the temporal constraints of a set of applications are met while optimizing the utilization of the available reconfigurable resources. Starting from a static platform that meets all the real-time deadlines, our approach takes advantage of runtime reconfiguration in order to reduce the area needed while guaranteeing that all the deadlines are still met. This goal is achieved by identifying which tasks must be always ready for execution in order to meet the deadlines and by means of a methodology that also allows reducing the area requirements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dewan:2014:BAF, author = "Farhana Dewan and Nathan Fisher", title = "Bandwidth allocation for fixed-priority-scheduled compositional real-time systems", journal = j-TECS, volume = "13", number = "4", pages = "91:1--91:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent research in compositional real-time systems has focused on determination of a component's real-time interface parameters. An important objective in interface-parameter determination is minimizing the bandwidth allocated to each component of the system while simultaneously guaranteeing component schedulability. With this goal in mind, in this article, we explore fixed-priority schedulability in compositional setting. First we derive an efficient exact test based on iterative convergence for sporadic task systems scheduled by fixed-priority (e.g., deadline monotonic, rate monotonic) upon an explicit-deadline periodic (EDP) resource. Then we address the time complexity of the exact test by developing a fully-polynomial-time approximation scheme (FPTAS) for allocating bandwidth to components. Our parametric algorithm takes the task system and an accuracy parameter $ \epsilon > 0 $ as input and returns a bandwidth which is guaranteed to be at most a factor $ (1 + \epsilon) $ times the optimal minimum bandwidth required to successfully schedule the task system. We perform thorough simulation over synthetically generated task systems to compare the performance of our proposed efficient-exact and the approximate algorithm and observe a significant decrease in runtime and a very small relative error when comparing the approximate algorithm with the exact algorithm and the sufficient algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2014:EIE, author = "I-Wei Wu and Jean Jyh-Jiun Shann and Wei-Chung Hsu and Chung-Ping Chung", title = "Extended Instruction Exploration for Multiple-Issue Architectures", journal = j-TECS, volume = "13", number = "4", pages = "92:1--92:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560039", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In order to satisfy the growing demand for high-performance computing in modern embedded devices, several architectural and microarchitectural enhancements have been implemented in processor architectures. Extended instruction (EI) is often used for architectural enhancement, while issuing multiple instructions is a common approach for microarchitectural enhancement. The impact of combining both of these approaches in the same design is not well understood. While previous studies have shown that EI can potentially improve performance in some applications on certain multiple-issue architectures, the algorithms used to identify EI for multiple-issue architectures yield only limited performance improvement. This is because not all arithmetic operations are suited for EI for multiple-issue architectures. To explore the full potential of EI for multiple-issue architectures, two important factors need to be considered: (1) the execution performance of an application is dominated by critical (located on the critical path) and highly resource-contentious (i.e., having a high probability of being delayed during execution due to hardware resource limitations) operations, and (2) an operation may become critical and/or highly resource contentious after some operations are added to the EI. This article presents an EI exploration algorithm for multiple-issue architectures that focuses on these two factors. Simulation results show that the proposed algorithm outperforms previously published algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Moussalli:2014:SPX, author = "Roger Moussalli and Mariam Salloum and Robert Halstead and Walid Najjar and Vassilis J. Tsotras", title = "A study on parallelizing {XML} path filtering using accelerators", journal = j-TECS, volume = "13", number = "4", pages = "93:1--93:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560040", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Publish-subscribe systems present the state of the art in information dissemination to multiple users. Such systems have evolved from simple topic-based to the current XML-based systems. XML-based pub-sub systems provide users with more flexibility by allowing the formulation of complex queries on the content as well as the structure of the streaming messages. Messages that match a given user query are forwarded to the user. This article examines how to exploit the parallelism found in XPath filtering. Using an incoming XML stream, parsing and matching thousands of user profiles are performed simultaneously by matching engines. We show the benefits and trade-offs of mapping the proposed filtering approach onto FPGAs, processing streams of XML at wire speed, and GPUs, providing the flexibility of software. This is in contrast to conventional approaches bound by the sequential aspect of software computing, associated with a large memory footprint. By converting XPath expressions into custom stacks, our solution is the first to provide support for complex XPath structural constructs, such as parent-child and ancestor descendant relations, whilst allowing wildcarding and recursion. The measured speedups resulting from the GPU and FPGA accelerations versus single-core CPUs are up to 6.6X and 2.5 orders of magnitude, respectively. The FPGA approaches are up to 31X faster than software running on 12 CPU cores.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2014:PRR, author = "Hengchang Liu and Pan Hui and Zhiheng Xie and Jingyuan Li and David Siu and Gang Zhou and Liusheng Huang and John A. Stankovic", title = "Providing reliable and real-time delivery in the presence of body shadowing in breadcrumb systems", journal = j-TECS, volume = "13", number = "4", pages = "94:1--94:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2557633", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The primary goal of breadcrumb trail sensor networks is to transmit in real-time users' physiological parameters that measure life-critical functions to an incident commander through reliable multihop communication. In applications using breadcrumb solutions, there are often many users working together, and this creates a well-known body shadowing effect (BSE). In this article, we first measure the characteristics of body shadowing for 2.4GHz sensor nodes. Our empirical results show that the body shadowing effect leads to severe packet loss and consequently very poor real-time performance. Then we develop a novel Intentional Forwarding solution. This solution accurately detects the shadowing mode and enables selected neighbors to forward data packets. Experimental results from a fully implemented testbed demonstrate that Intentional Forwarding is able to improve the end-to-end average packet delivery ratio (PDR) from 58\% to 93\% and worst-case PDR from 45\% to 85\%, and is able to meet soft real-time requirements even under severe body shadowing problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gal:2014:GLC, author = "Bertrand {Le Gal} and Christophe Jego", title = "{GPU-like} on-chip system for decoding {LDPC} codes", journal = j-TECS, volume = "13", number = "4", pages = "95:1--95:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2538668", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Rapid prototyping is an important step in the development and the verification of computationally demanding tasks of digital communication systems, such as Forward Error Correction (FEC) decoding. The goal is to replace time-consuming simulations based on abstract models of the system with real-time experiments under real-world conditions. GPU-like architecture is a promising approach to fully exploit the potential of FPGA-based acceleration platforms. In this article, an application-specific GPU-like architecture and a complete compilation framework for decoding LDPC codes are proposed. The interest in an application-specific GPU in comparison with current GPUs is detailed. Finally, real-time experimentations demonstrate the potential of the GPU-like decoder to investigate both algorithmic and architectural issues.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khan:2014:OLT, author = "Umair Ali Khan and Bernhard Rinner", title = "Online learning of timeout policies for dynamic power management", journal = j-TECS, volume = "13", number = "4", pages = "96:1--96:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2529992", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamic power management (DPM) refers to strategies which selectively change the operational states of a device during runtime to reduce the power consumption based on the past usage pattern, the current workload, and the given performance constraint. The power management problem becomes more challenging when the workload exhibits nonstationary behavior which may degrade the performance of any single or static DPM policy. This article presents a reinforcement learning (RL)-based DPM technique for optimal selection of timeout values in the different device states. Each timeout period determines how long the device will remain in a particular state before the transition decision is taken. The timeout selection is based on workload estimates derived from a Multilayer Artificial Neural Network (ML-ANN) and an objective function given by weighted performance and power parameters. Our DPM approach is further able to adapt the power-performance weights online to meet user-specified power and performance constraints, respectively. We have completely implemented our DPM algorithm on our embedded traffic surveillance platform and performed long-term experiments using real traffic data to demonstrate the effectiveness of the DPM. Our results show that the proposed learning algorithm not only adequately explores the power-performance trade-off with nonstationary workload but can also successfully perform online adjustment of the trade-off parameter in order to meet the user-specified constraint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gong:2014:SBF, author = "Lingkan Gong and Oliver Diessel", title = "Simulation-based functional verification of dynamically reconfigurable systems", journal = j-TECS, volume = "13", number = "4", pages = "97:1--97:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560042", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamically reconfigurable systems (DRS) implemented using field-programmable gate arrays (FPGAs) allow hardware logic to be partially reconfigured while the rest of the design continues to operate. By mapping multiple reconfigurable hardware modules to the same physical region of an FPGA, such systems are able to time-multiplex their modules at runtime and adapt themselves to changing execution requirements. This architectural flexibility introduces challenges for verifying system functionality. New simulation approaches are required to extend traditional simulation techniques to assist designers in testing and debugging the time-varying behavior of DRS. This article summarizes our previous work on ReSim, the first tool to allow cycle-accurate yet physically independent simulation of a DRS reconfiguring both its logic and state. Furthermore, ReSim-based simulation does not require changing the design for simulation purposes and thereby verifies the implementation-ready design instead of a variation of the design. We discuss the conflicting requirements of simulation accuracy and verification productivity in verifying DRS designs and describe our approach to resolve this challenge. Through a range of case studies, we demonstrate that ReSim assists designers in detecting fabric-independent bugs of DRS designs and helps to achieve verification closure of DRS design projects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guimbretiere:2014:ADP, author = "Fran{\c{c}}ois Guimbreti{\'e}re and Shenwei Liu and Han Wang and Rajit Manohar", title = "An asymmetric dual-processor architecture for low-power information appliances", journal = j-TECS, volume = "13", number = "4", pages = "98:1--98:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2560538", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Mar 11 18:33:06 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As users become increasingly conscious of their energy footprint-either to improve battery life or to respect the environment-improved energy efficiency of systems has gained in importance. This is especially important in the context of information appliances such as e-book readers that are meant to replace books, since their energy efficiency impacts how long the appliance can be used on a single charge of the battery. In this article, we present a new software and hardware architecture for information appliances that provides significant advantages in terms of device lifetime. The architecture combines a low-power microcontroller with a high-performance application processor, where the low-power microcontroller is used to handle simple user interactions (e.g., turning pages, inking, entering text) without waking up the main application processor. We demonstrate how this architecture is easily adapted to the traditional way of building user interfaces using a user interface markup language. We report on our initial measurements using an E Ink-based prototype. When comparing our hybrid architecture to a simpler solution we found that we can increase the battery life by a factor of 1.72 for a reading task and by a factor of 3.23 for a writing task. We conclude by presenting design guidelines aimed at optimizing the overall energy signature of information appliances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anonymous:2014:AOS, author = "Anonymous", title = "Abstracts: Online Supplements Volume 13, Number 1s Volume 13, Number 2s Volume 13, Number 3s Volume 13, Number 4s Volume 13, Number 5s", journal = j-TECS, volume = "13", number = "4", pages = "99:1--99:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2688494.2688495", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 5 18:52:55 MST 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Natale:2014:ESI, author = "Marco {Di Natale} and Rich West and Jian-Jia Chen and Rahul Mangharam", title = "Editorial: Special issue on real-time and embedded technology and applications", journal = j-TECS, volume = "13", number = "4s", pages = "119:1--119:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2588608", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "119", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Whitham:2014:ERC, author = "Jack Whitham and Neil C. Audsley and Robert I. Davis", title = "Explicit reservation of cache memory in a predictable, preemptive multitasking real-time system", journal = j-TECS, volume = "13", number = "4s", pages = "120:1--120:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2523070", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We describe and evaluate explicit reservation of cache memory to reduce the cache-related preemption delay (CRPD) observed when tasks share a cache in a preemptive multitasking hard real-time system. We demonstrate the approach using measurements obtained from a hardware prototype, and present schedulability analyses for systems that share a cache by explicit reservation. These analyses form the basis for a series of experiments to further evaluate the approach. We find that explicit reservation is most useful for larger task sets with high utilization. Some task sets cannot be scheduled with a conventional cache, but are schedulable with explicit reservation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "120", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nirjon:2014:MSR, author = "Shahriar Nirjon and Angela Nicoara and Cheng-Hsin Hsu and Jatinder Pal Singh and John A. Stankovic", title = "{MultiNets}: a system for real-time switching between multiple network interfaces on mobile devices", journal = j-TECS, volume = "13", number = "4s", pages = "121:1--121:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2489788", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "MultiNets is a system supporting seamless switch-over between wireless interfaces on mobile devices in real-time. MultiNets is configurable to run in three different modes: (i) Energy Saving mode --for choosing the interface that saves the most energy based on the condition of the device, (ii) Offload mode --for offloading data traffic from the cellular to WiFi network, and (iii) Performance mode --for selecting the network for the fastest data connectivity. MultiNets also provides a powerful API that gives the application developers: (i) the choice to select a network interface to communicate with a specific server, and (ii) the ability to simultaneously transfer data over multiple network interfaces. MultiNets is modular, easily integrable, lightweight, and applicable to various mobile operating systems. We implement MultiNets on Android devices as a show case. MultiNets does not require any extra support from the network infrastructure and runs existing applications transparently. To evaluate MultiNets, we first collect data traces from 13 actual Android smartphone users over three months. We then use the collected traces to show that, by automatically switching to WiFi whenever it is available, MultiNets can offload on average 79.82\% of the data traffic. We also illustrate that, by optimally switching between the interfaces, MultiNets can save on average 21.14 KJ of energy per day, which is equivalent to 27.4\% of the daily energy usage. Using our API, we demonstrate that a video streaming application achieves 43--271\% higher streaming rate when concurrently using WiFi and 3G interfaces. We deploy MultiNets in a real-world scenario and our experimental results show that depending on the user requirements, it outperforms the state-of-the-art Android system either by saving up to 33.75\% energy, achieving near-optimal offloading, or achieving near-optimal throughput while substantially reducing TCP interruptions due to switching.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "121", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2014:WCG, author = "Pratyush Kumar and Lothar Thiele", title = "Worst-case guarantees on a processor with temperature-based feedback control of speed", journal = j-TECS, volume = "13", number = "4s", pages = "122:1--122:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584611", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "On-chip temperatures continue to rise, in spite of design efforts towards more efficient cooling and novel low-power technologies. Run-time thermal management techniques, such as speed scaling and system throttling, constitute a standard component in today's processors. One such technique is the feedback control of the processing speed based on the on-chip temperature. If suitably designed, such a controller can ensure that the temperature of the processor does not exceed a given bound, independent of the application. Such isolation of needs is encouraging. However, from the application's stand-point, such a processor must provide performance guarantees; in particular, the guarantee that real-time jobs do not have worst-case delays larger than their relative deadlines. For applications which exhibit variability, such as bursty arrival patterns, computing such guarantees is not apparent. As key enablers in such a computation, for the specific setting of First-Come-First-Serve (FCFS) scheduling, we (a) define and prove a monotonicity principle satisfied by the processor with the said controller, and (b) propose a thermally clipped processor model. We identify the worst-case trace simulating which on a suitably chosen thermally clipped processor provides the tight upper-bound on the worst-case delay. These results hold for general models of (a) the power consumption of the processor, (b) its thermal model, (c) the speed scaling law, and (d) the task model. For this modelling scope, we show that the same worst-case trace also leads to the worst-case temperature of the processor. This is useful to characterise tasks which do not load the processor sufficiently to hit the given peak temperature bound. We demonstrate the utility of this calculation by designing a shaper to delay the arrival times of jobs and thereby restrict the observed worst-case temperature while still meeting the task's deadlines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "122", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guan:2014:WAM, author = "Nan Guan and Mingsong Lv and Wang Yi and Ge Yu", title = "{WCET} analysis with {MRU} cache: Challenging {LRU} for predictability", journal = j-TECS, volume = "13", number = "4s", pages = "123:1--123:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584655", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most previous work on cache analysis for WCET estimation assumes a particular replacement policy called LRU. In contrast, much less work has been done for non-LRU policies, since they are generally considered to be very unpredictable. However, most commercial processors are actually equipped with these non-LRU policies, since they are more efficient in terms of hardware cost, power consumption and thermal output, while still maintaining almost as good average-case performance as LRU. In this work, we study the analysis of MRU, a non-LRU replacement policy employed in mainstream processor architectures like Intel Nehalem. Our work shows that the predictability of MRU has been significantly underestimated before, mainly because the existing cache analysis techniques and metrics do not match MRU well. As our main technical contribution, we propose a new cache hit/miss classification, k -Miss, to better capture the MRU behavior, and develop formal conditions and efficient techniques to decide k -Miss memory accesses. A remarkable feature of our analysis is that the k -Miss classifications under MRU are derived by the analysis result of the same program under LRU. Therefore, our approach inherits the advantages in efficiency and precision of the state-of-the-art LRU analysis techniques based on abstract interpretation. Experiments with instruction caches show that our proposed MRU analysis has both good precision and high efficiency, and the obtained estimated WCET is rather close to (typically 1\% to 8\% more than) that obtained by the state-of-the-art LRU analysis, which indicates that MRU is also a good candidate for cache replacement policies in real-time systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "123", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chattopadhyay:2014:UWA, author = "Sudipta Chattopadhyay and Lee Kee Chong and Abhik Roychoudhury and Timon Kelter and Peter Marwedel and Heiko Falk", title = "A Unified {WCET} analysis framework for multicore platforms", journal = j-TECS, volume = "13", number = "4s", pages = "124:1--124:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584654", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the advent of multicore architectures, worst-case execution time (WCET) analysis has become an increasingly difficult problem. In this article, we propose a unified WCET analysis framework for multicore processors featuring both shared cache and shared bus. Compared to other previous works, our work differs by modeling the interaction of shared cache and shared bus with other basic microarchitectural components (e.g., pipeline and branch predictor). In addition, our framework does not assume a timing anomaly free multicore architecture for computing the WCET. A detailed experiment methodology suggests that we can obtain reasonably tight WCET estimates in a wide range of benchmark programs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "124", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2014:CCL, author = "Xiuming Zhu and Pei-Chi Huang and Jianyong Meng and Song Han and Aloysius K. Mok and Deji Chen and Mark Nixon", title = "{ColLoc}: a collaborative location and tracking system on {WirelessHART}", journal = j-TECS, volume = "13", number = "4s", pages = "125:1--125:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584656", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Localization in wireless sensor networks is an important functionality that is required for tracking personnel and assets in industrial environments, especially for emergency response. Current commercial localization systems such as GPS suffer from the limitations of either high cost or low availability in many situations (e.g., indoor environments that exclude direct line-of-sight signal reception). The development of industrial wireless sensor networks such as WirelessHART provides an alternative. In this article, we present the design and implementation of ColLoc: a collaborative location and tracking system on WirelessHART as an industrially viable solution. This solution is built upon several technological advances. First, ColLoc adds the roaming functionality to WirelessHART and thus provides a means for keeping mobile WirelessHART devices connected to the network. Second, ColLoc employs a collaborative framework to integrate different types of distance measurements into the location estimation algorithm by weighing them according to their precision levels. ColLoc adopts several novel techniques to improve distance estimation accuracy and decreases the RSSI presurvey cost. These techniques include introducing distance error range constraints to the measurements, judiciously selecting the initial point in location estimation and online updating the signal propagation models in the anchor nodes, integrating Extended Kalman Filter (EKF) with trilateration to track moving objects. Our implementation of ColLoc can be applied to any WirelessHART-conforming network because no modification is needed on the WirelessHART field devices. We have implemented a complete ColLoc system to validate both the design and the effectiveness of our localization algorithm. Our experiments show that the mobile device never drops out of the WirelessHART network while moving around; with the help of even one dependable anchor, using RSSI can yield at least 75\% of distance errors below 5 meters, which is quite acceptable for many typical industrial automation applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "125", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2014:IEM, author = "Huang-Ming Huang and Christopher Gill and Chenyang Lu", title = "Implementation and evaluation of mixed-criticality scheduling approaches for sporadic tasks", journal = j-TECS, volume = "13", number = "4s", pages = "126:1--126:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584612", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", note = "See corrections and comments \cite{Fleming:2017:CDI}.", abstract = "Traditional fixed-priority scheduling analysis for periodic and sporadic task sets is based on the assumption that all tasks are equally critical to the correct operation of the system. Therefore, every task has to be schedulable under the chosen scheduling policy, and estimates of tasks' worst-case execution times must be conservative in case a task runs longer than is usual. To address the significant underutilization of a system's resources under normal operating conditions that can arise from these assumptions, several mixed-criticality scheduling approaches have been proposed. However, to date, there have been few quantitative comparisons of system schedulability or runtime overhead for the different approaches. In this article, we present a side-by-side implementation and evaluation of the known mixed-criticality scheduling approaches, for periodic and sporadic mixed-criticality tasks on uniprocessor systems, under a mixed-criticality scheduling model that is common to all these approaches. To make a fair evaluation of mixed-criticality scheduling, we also address previously open issues and propose modifications to improve particular approaches. Our empirical evaluations demonstrate that user-space implementations of mechanisms to enforce different mixed-criticality scheduling approaches can be achieved atop Linux without kernel modification, with reasonably low (but in some cases nontrivial) overhead for mixed-criticality real-time task sets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "126", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pajic:2014:SCM, author = "Miroslav Pajic and Zhihao Jiang and Insup Lee and Oleg Sokolsky and Rahul Mangharam", title = "Safety-critical medical device development using the {UPP2SF} model translation tool", journal = j-TECS, volume = "13", number = "4s", pages = "127:1--127:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584651", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software-based control of life-critical embedded systems has become increasingly complex, and to a large extent has come to determine the safety of the human being. For example, implantable cardiac pacemakers have over 80,000 lines of code which are responsible for maintaining the heart within safe operating limits. As firmware-related recalls accounted for over 41\% of the 600,000 devices recalled in the last decade, there is a need for rigorous model-driven design tools to generate verified code from verified software models. To this effect, we have developed the UPP2SF model-translation tool, which facilitates automatic conversion of verified models (in UPPAAL) to models that may be simulated and tested (in Simulink/Stateflow). We describe the translation rules that ensure correct model conversion, applicable to a large class of models. We demonstrate how UPP2SF is used in the model-driven design of a pacemaker whose model is (a) designed and verified in UPPAAL (using timed automata), (b) automatically translated to Stateflow for simulation-based testing, and then (c) automatically generated into modular code for hardware-level integration testing of timing-related errors. In addition, we show how UPP2SF may be used for worst-case execution time estimation early in the design stage. Using UPP2SF, we demonstrate the value of integrated end-to-end modeling, verification, code-generation and testing process for complex software-controlled embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "127", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saifullah:2014:NOR, author = "Abusayeed Saifullah and Chengjie Wu and Paras Babu Tiwari and You Xu and Yong Fu and Chenyang Lu and Yixin Chen", title = "Near optimal rate selection for wireless control systems", journal = j-TECS, volume = "13", number = "4s", pages = "128:1--128:??", month = apr, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2584652", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Apr 4 18:59:24 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the advent of industrial standards such as WirelessHART, process industries are now gravitating towards wireless control systems. Due to limited bandwidth in a wireless network shared by multiple control loops, it is critical to optimize the overall control performance. In this article, we address the scheduling-control co-design problem of determining the optimal sampling rates of feedback control loops sharing a WirelessHART network. The objective is to minimize the overall control cost while ensuring that all data flows meet their end-to-end deadlines. The resulting constrained optimization based on existing delay bounds for WirelessHART networks is challenging since it is nondifferentiable, nonlinear, and not in closed-form. We propose four methods to solve this problem. First, we present a subgradient method for rate selection. Second, we propose a greedy heuristic that usually achieves low control cost while significantly reducing the execution time. Third, we propose a global constrained optimization algorithm using a simulated annealing (SA) based penalty method. We study SA method under both constant factor penalty and adaptive penalty. Finally, we formulate rate selection as a differentiable convex optimization problem that provides a quick solution through a convex optimization technique. This is based on a new delay bound that is convex and differentiable, and hence simplifies the optimization problem. We study both the gradient descent method and the interior point method to solve it. We evaluate all methods through simulations based on topologies of a 74-node wireless sensor network testbed. The subgradient method is disposed to incur the longest execution time as well as the highest control cost among all methods. Among the SA-based constant penalty method, the greedy heuristic, and the gradient descent method, the first two represent the opposite ends of the tradeoff between control cost and execution time, while the third one hits the balance between the two. We further observe that the SA based adaptive penalty method is superior to the constant penalty method, and that the interior point method is superior to the gradient method. Thus, the interior point method and the SA-based adaptive penalty method are the two most effective approaches for rate selection. While both methods are competitive against each other in terms of control cost, the interior point method is significantly faster than the penalty method. As a result, the interior point method upon convex relaxation is more suitable for online rate adaptation than the SA based adaptive penalty method due to their significant difference in run-time efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "128", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hettiarachchi:2014:DAF, author = "Pradeep M. Hettiarachchi and Nathan Fisher and Masud Ahmed and Le Yi Wang and Shinan Wang and Weisong Shi", title = "A Design and Analysis Framework for Thermal-Resilient Hard Real-Time Systems", journal = j-TECS, volume = "13", number = "5s", pages = "146:1--146:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We address the challenge of designing predictable real-time systems in an unpredictable thermal environment where environmental temperature may dynamically change (e.g., implantable medical devices). Towards this challenge, we propose a control-theoretic design methodology that permits a system designer to specify a set of hard real-time performance modes under which the system may operate. The system automatically adjusts the real-time performance mode based on the external thermal stress. We show (via analysis, simulations, and a hardware testbed implementation) that our control design framework is stable and control performance is equivalent to previous real-time thermal approaches, even under dynamic temperature changes. A crucial and novel advantage of our framework over previous real-time control is the ability to guarantee hard deadlines even under transitions between modes. Furthermore, our system design permits the calculation of a new metric called thermal resiliency that characterizes the maximum external thermal stress that any hard real-time performance mode can withstand. Thus, our design framework and analysis may be classified as a thermal stress analysis for real-time systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "146", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chattopadhyay:2014:CRP, author = "Sudipta Chattopadhyay and Abhik Roychoudhury", title = "Cache-Related Preemption Delay Analysis for Multilevel Noninclusive Caches", journal = j-TECS, volume = "13", number = "5s", pages = "147:1--147:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632156", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the rapid growth of complex hardware features, timing analysis has become an increasingly difficult problem. The key to solving this problem lies in the precise and scalable modeling of performance-enhancing processor features (e.g., cache). Moreover, real-time systems are often multitasking and use preemptive scheduling, with fixed or dynamic priority assignment. For such systems, cache related preemption delay (CRPD) may increase the execution time of a task. Therefore, CRPD may affect the overall schedulability analysis. Existing works propose to bound the value of CRPD in a single-level cache. In this article, we propose a CRPD analysis framework that can be used for a two-level, noninclusive cache hierarchy. In addition, our proposed framework is also applicable in the presence of shared caches. We first show that CRPD analysis faces several new challenges in the presence of a multilevel, noninclusive cache hierarchy. Our proposed framework overcomes all such challenges and we can formally prove the correctness of our framework. We have performed experiments with several subject programs, including an unmanned aerial vehicle (UAV) controller and an in-situ space debris monitoring instrument. Our experimental results suggest that we can provide sound and precise CRPD estimates using our framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "147", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paul:2014:RTP, author = "Anand Paul", title = "Real-Time Power Management for Embedded {M2M} Using Intelligent Learning Methods", journal = j-TECS, volume = "13", number = "5s", pages = "148:1--148:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632158", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, an embedded system working model is designed with one server that receives requests by a requester by a service queue that is monitored by a Power Manager (PM). A novel approach is presented based on reinforcement learning to predict the best policy amidst existing DPM policies and deterministic Markovian nonstationary policies (DMNSP). We apply reinforcement learning, namely a computational approach to understanding and automating goal-directed learning that supports different devices according to their DPM. Reinforcement learning uses a formal framework defining the interaction between agent and environment in terms of states, response action, and reward points. The capability of this approach is demonstrated by an event-driven simulator designed using Java with a power-manageable machine-to-machine device. Our experiment result shows that the proposed dynamic power management with timeout policy gives average power saving from 4\% to 21\% and the novel dynamic power management with DMNSP gives average power saving from 10\% to 28\% more than already proposed DPM policies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "148", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zeng:2014:MSC, author = "Haibo Zeng and Marco {Di Natale} and Qi Zhu", title = "Minimizing Stack and Communication Memory Usage in Real-Time Embedded Applications", journal = j-TECS, volume = "13", number = "5s", pages = "149:1--149:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632160", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the development of real-time embedded applications, especially those on systems-on-chip, an efficient use of RAM memory is as important as the effective scheduling of the computation resources. The protection of communication and state variables accessed by concurrent tasks must provide real-time schedulability guarantees while using the least amount of memory. Several schemes, including preemption thresholds, have been developed to improve schedulability and save stack space by selectively disabling preemption. However, the design synthesis problem is still open. In this article, we target the assignment of the scheduling parameters to minimize memory usage for systems of practical interest, including designs compliant with automotive standards. We propose algorithms either proven optimal or shown to improve on randomized optimization methods like simulated annealing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "149", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chakraborty:2014:MCH, author = "Arup Chakraborty and Houman Homayoun and Amin Khajeh and Nikil Dutt and Ahmed Eltawil and Fadi Kurdahi", title = "Multicopy Cache: a Highly Energy-Efficient Cache Architecture", journal = j-TECS, volume = "13", number = "5s", pages = "150:1--150:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2632162", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Caches are known to consume a large part of total microprocessor energy. Traditionally, voltage scaling has been used to reduce both dynamic and leakage power in caches. However, aggressive voltage reduction causes process-variation-induced failures in cache SRAM arrays, thus compromising cache reliability. We present MultiCopy Cache (MC$^2$), a new cache architecture that achieves significant reduction in energy consumption through aggressive voltage scaling while maintaining high error resilience (reliability) by exploiting multiple copies of each data item in the cache. Unlike many previous approaches, MC$^2$ does not require any error map characterization and therefore is responsive to changing operating conditions (e.g., Vdd noise, temperature, and leakage) of the cache. MC$^2$ also incurs significantly lower overheads compared to other ECC-based caches. Our experimental results on embedded benchmarks demonstrate that MC$^2$ achieves up to 60\% reduction in energy and energy-delay product (EDP) with only 3.5\% reduction in IPC and no appreciable area overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "150", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hanumaiah:2014:SST, author = "Vinay Hanumaiah and Digant Desai and Benjamin Gaudette and Carole-Jean Wu and Sarma Vrudhula", title = "{STEAM}: a Smart Temperature and Energy Aware Multicore Controller", journal = j-TECS, volume = "13", number = "5s", pages = "151:1--151:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2661430", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent empirical studies have shown that multicore scaling is fast becoming power limited, and consequently, an increasing fraction of a multicore processor has to be under clocked or powered off. Therefore, in addition to fundamental innovations in architecture, compilers and parallelization of application programs, there is a need to develop practical and effective dynamic energy management (DEM) techniques for multicore processors. Existing DEM techniques mainly target reducing processor power consumption and temperature, and only few of them have addressed improving energy efficiency for multicore systems. With energy efficiency taking a center stage in all aspects of computing, the focus of the DEM needs to be on finding practical methods to maximize processor efficiency. Towards this, this article presents STEAM --- an optimal closed-loop DEM controller designed for multicore processors. The objective is to maximize energy efficiency by dynamic voltage and frequency scaling (DVFS). Energy efficiency is defined as the ratio of performance to power consumption or performance-per-watt (PPW). This is the same as the number of instructions executed per Joule. The PPW metric is actually replaced by $ P^\alpha $ PW (performance$^\alpha $-per-Watt), which allows for controlling the importance of performance versus power consumption by varying $ \alpha $. The proposed controller was implemented on a Linux system and tested with the Intel Sandy Bridge processor. There are three power management schemes called governors, available with Intel platforms. They are referred to as (1) Powersave (lowest power consumption), (2) Performance (achieves highest performance), and (3) Ondemand. Our simple and lightweight controller when executing SPEC CPU2006, PARSEC, and MiBench benchmarks have achieved an average of 18\% improvement in energy efficiency (MIPS/Watt) over these ACPI policies. Moreover, STEAM also demonstrated an excellent prediction of core temperatures and power consumption, and the ability to control the core temperatures within $ 3^\circ $C of the specified maximum. Finally, the overhead of the STEAM implementation (in terms of CPU resources) is less than 0.25\%. The entire implementation is self-contained and can be installed on any processor with very little prior knowledge of the processor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "151", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rossebo:2014:ISI, author = "Judith E. Y. Rosseb{\o} and Siv Hilde Houmb and Geri Georg and Virginia N. L. Franqueira and Dimitrios Serpanos", title = "Introduction to Special Issue on Risk and Trust in Embedded Critical Systems", journal = j-TECS, volume = "13", number = "5s", pages = "152:1--152:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2659008", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "152", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dunbar:2014:DTE, author = "Carson Dunbar and Gang Qu", title = "Designing Trusted Embedded Systems from Finite State Machines", journal = j-TECS, volume = "13", number = "5s", pages = "153:1--153:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2638555", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sequential components are crucial for a real-time embedded system as they control the system based on the system's current state and real life input. In this article, we explore the security and trust issues of sequential system design from the perspective of a finite state machine (FSM), which is the most popular model used to describe sequential systems. Specifically, we find that the traditional FSM synthesis procedure will introduce security risks and cannot guarantee trustworthiness in the implemented circuits. Indeed, we show that not only do there exist simple and effective ways to attack a sequential system, it is also possible to insert a hardware Trojan Horse into the design without introducing any significant design overhead. We then formally define the notion of trust in FSM and propose a novel approach to designing trusted circuits from the FSM specification. We demonstrate both our findings on the security threats and the effectiveness of our proposed method on Microelectronics Center of North Carolina (MCNC) sequential circuit benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "153", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dua:2014:CSS, author = "Akshay Dua and Nirupama Bulusu and Wu-Chang Feng and Wen Hu", title = "Combating Software and {Sybil} Attacks to Data Integrity in Crowd-Sourced Embedded Systems", journal = j-TECS, volume = "13", number = "5s", pages = "154:1--154:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Crowd-sourced mobile embedded systems allow people to contribute sensor data, for critical applications, including transportation, emergency response and eHealth. Data integrity becomes imperative as malicious participants can launch software and Sybil attacks modifying the sensing platform and data. To address these attacks, we develop (1) a Trusted Sensing Peripheral (TSP) enabling collection of high-integrity raw or aggregated data, and participation in applications requiring additional modalities; and (2) a Secure Tasking and Aggregation Protocol (STAP) enabling aggregation of TSP trusted readings by untrusted intermediaries, while efficiently detecting fabricators. Evaluations demonstrate that TSP and STAP are practical and energy-efficient.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "154", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2014:ISI, author = "Li-Pin Chang and Tei-Wei Kuo and Chris Gill and Jin Nakazawa", title = "Introduction to the Special Issue on Real-Time, Embedded and Cyber-Physical Systems", journal = j-TECS, volume = "13", number = "5s", pages = "155:1--155:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660488", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "155", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Qiu:2014:BPD, author = "Keni Qiu and Mengying Zhao and Chun Jason Xue and Alex Orailoglu", title = "Branch Prediction-Directed Dynamic Instruction Cache Locking for Embedded Systems", journal = j-TECS, volume = "13", number = "5s", pages = "156:1--156:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660492", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cache locking is a cache management technique to preclude the replacement of locked cache contents. Cache locking is often adopted to improve cache access predictability in Worst-Case Execution Time (WCET) analysis. Static cache locking methods have been proposed recently to improve Average-Case Execution Time (ACET) performance. This article presents an approach, Branch Prediction-directed Dynamic Cache Locking (BPDCL), to improve system performance through cache conflict miss reduction. In the proposed approach, the control flow graph of a program is first partitioned into disjoint execution regions, then memory blocks worth locking are determined by calculating the locking profit for each region. These two steps are conducted during compilation time. At runtime, directed by branch predictions, locking routines are prefetched into a small high-speed buffer. The predetermined cache locking contents are loaded and locked at specific execution points during program execution. Experimental results show that the proposed BPDCL method exhibits an average improvement of 25.9\%, 13.8\%, and 8.0\% on cache miss rate reduction in comparison to cases with no cache locking, the static locking method, and the dynamic locking method, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "156", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kang:2014:HSA, author = "Chih-Kai Kang and Yu-Jhang Cai and Chin-Hsien Wu and Pi-Cheng Hsiu", title = "A Hybrid Storage Access Framework for High-Performance Virtual Machines", journal = j-TECS, volume = "13", number = "5s", pages = "157:1--157:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660493", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "In recent years, advances in virtualization technology have enabled multiple virtual machines to run on a physical machine, such that each virtual machine can perform independently with its own operating system. The IT industry has adopted virtualization technology because of its ability to improve hardware resource utilization, achieve low-power consumption, support concurrent applications, simplify device management, and reduce maintenance costs. However, because of the hardware limitation of storage devices, the I/O capacity could cause performance bottlenecks. To address the problem, we propose a hybrid storage access framework that exploits solid-state drives (SSDs) to improve the I/O performance in a virtualization environment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "157", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pagani:2014:EEA, author = "Santiago Pagani and Jian-Jia Chen", title = "Energy Efficiency Analysis for the Single Frequency Approximation {(SFA)} Scheme", journal = j-TECS, volume = "13", number = "5s", pages = "158:1--158:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660490", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy-efficient designs are important issues in computing systems. This article studies the energy efficiency of a simple and linear-time strategy, called the Single Frequency Approximation (SFA) scheme, for periodic real-time tasks on multicore systems with a shared supply voltage in a voltage island. The strategy executes all the cores at a single frequency to just meet the timing constraints. SFA has been adopted in the literature after task partitioning, but the worst-case performance of SFA in terms of energy consumption incurred is an open problem. We provide comprehensive analysis for SFA to derive the cycle utilization distribution for its worst-case behaviour for energy minimization. Our analysis shows that the energy consumption incurred by using SFA for task execution is at most 1.53 (1.74, 2.10, 2.69, respectively), compared to the energy consumption of the optimal voltage/frequency scaling, when the dynamic power consumption is a cubic function of the frequency and the voltage island has up to 4 (8, 16, 32, respectively) cores. The analysis shows that SFA is indeed an effective scheme under practical settings, even though it is not optimal. Furthermore, since all the cores run at a single frequency and no frequency alignment for Dynamic Voltage and Frequency Scaling (DVFS) between cores is needed, any unicore dynamic power management technique for reducing the energy consumption for idling can be easily incorporated individually on each core in the voltage island. This article also provides an analysis of energy consumption for SFA combined with procrastination for Dynamic Power Management (DPM), resulting in an increment of 1 from the previous results for task execution. Furthermore, we also extend our analysis for deriving the approximation factor of SFA for a multicore system with multiple voltage islands.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "158", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Raravi:2014:TAA, author = "Gurulingesh Raravi and Vincent N{\'e}lis", title = "Task Assignment Algorithms for Heterogeneous Multiprocessors", journal = j-TECS, volume = "13", number = "5s", pages = "159:1--159:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660494", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Consider the problem of assigning implicit-deadline sporadic tasks on a heterogeneous multiprocessor platform comprising a constant number (denoted by $t$) of distinct types of processors-such a platform is referred to as a $t$-type platform. We present two algorithms, LPG$_{IM}$ and LPG$_{NM}$, each providing the following guarantee. For a given $t$-type platform and a task set, if there exists a task assignment such that tasks can be scheduled to meet their deadlines by allowing them to migrate only between processors of the same type (intra-migrative), then: (i) LPG$_{IM}$ succeeds in finding such an assignment where the same restriction on task migration applies (intra-migrative) but given a platform in which only one processor of each type is $ 1 + \alpha \times t - 1 / t$ times faster and (ii) LPG$_{NM}$ succeeds in finding a task assignment where tasks are not allowed to migrate between processors (non-migrative) but given a platform in which every processor is $ 1 + \alpha $ times faster. The parameter $ \alpha $ is a property of the task set; it is the maximum of all the task utilizations that are no greater than one. To the best of our knowledge, for $t$-type heterogeneous multiprocessors: (i) for the problem of intra-migrative task assignment, no previous algorithm exists with a proven bound and hence our algorithm, LPG$_{IM}$, is the first of its kind and (ii) for the problem of non-migrative task assignment, our algorithm, LPG$_{NM}$, has superior performance compared to state-of-the-art.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "159", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Andersson:2014:PGT, author = "Bj{\"o}rn Andersson and Gurulingesh Raravi", title = "Provably Good Task Assignment for Two-Type Heterogeneous Multiprocessors Using Cutting Planes", journal = j-TECS, volume = "13", number = "5s", pages = "160:1--160:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660495", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Consider scheduling of real-time tasks on a multiprocessor where migration is forbidden. Specifically, consider the problem of determining a task-to-processor assignment for a given collection of implicit-deadline sporadic tasks upon a multiprocessor platform in which there are two distinct types of processors. For this problem, we propose a new algorithm, LPC (task assignment based on solving a Linear Program with Cutting planes). The algorithm offers the following guarantee: for a given task set and a platform, if there exists a feasible task-to-processor assignment, then LPC succeeds in finding such a feasible task-to-processor assignment as well but on a platform in which each processor is $ 1.5 \times $ faster and has three additional processors. For systems with a large number of processors, LPC has a better approximation ratio than state-of-the-art algorithms. To the best of our knowledge, this is the first work that develops a provably good real-time task assignment algorithm using cutting planes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "160", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mancuso:2014:OPA, author = "Giulio M. Mancuso and Enrico Bini and Gabriele Pannocchia", title = "Optimal Priority Assignment to Control Tasks", journal = j-TECS, volume = "13", number = "5s", pages = "161:1--161:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660496", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In embedded real-time systems, task priorities are often assigned to meet deadlines. However, in control tasks, a late completion of a task has no catastrophic consequence; rather, it has a quantifiable impact in the control performance achieved by the task. In this article, we address the problem of determining the optimal assignment of priorities and periods of sampled-data control tasks that run over a shared computation unit. We show that the minimization of the overall cost can be performed efficiently using a branch and bound algorithm that can be further speeded up by allowing for a small degree of suboptimality. Detailed numerical simulations are presented to show the advantages of various branching alternatives, the overall algorithm effectiveness, and its scalability with the number of tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "161", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DeNiz:2014:UBR, author = "Dionisio {De Niz} and Lutz Wrage and Anthony Rowe and Ragunathan (Raj) Rajkumar", title = "Utility-Based Resource Overbooking for Cyber-Physical Systems", journal = j-TECS, volume = "13", number = "5s", pages = "162:1--162:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660497", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traditional hard real-time scheduling algorithms require the use of the worst-case execution times to guarantee that deadlines will be met. Unfortunately, many algorithms with parameters derived from sensing the physical world suffer large variations in execution time, leading to pessimistic overall utilization, such as visual recognition tasks. In this article, we present ZS-QRAM, a scheduling approach that enables the use of flexible execution times and application-derived utility to tasks in order to maximize total system utility. In particular, we provide a detailed description of the algorithm, the formal proofs for its temporal protection, and a detailed, evaluation. Our evaluation uses the Utility Degradation Resilience (UDR) showing that ZS-QRAM is able to obtain $ 4 \times $ as much UDR as ZSRM, a previous overbooking approach, and almost $ 2 \times $ as much UDR as Rate-Monotonic with Period Transformation (RM/TP). We then evaluate a Linux kernel module implementation of our scheduler on an Unmanned Air Vehicle (UAV) platform. We show that, by using our approach, we are able to keep the tasks that render the most utility by degrading lower-utility ones even in the presence of highly dynamic execution times.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "162", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2014:STD, author = "Kai Liu and Victor C. S. Lee and Joseph K. Y. Ng and Sang H. Son and Edwin H.-M. Sha", title = "Scheduling Temporal Data with Dynamic Snapshot Consistency Requirement in Vehicular Cyber-Physical Systems", journal = j-TECS, volume = "13", number = "5s", pages = "163:1--163:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629546", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 16:07:59 MDT 2014", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Timely and efficient data dissemination is one of the fundamental requirements to enable innovative applications in vehicular cyber-physical systems (VCPS). In this work, we intensively analyze the characteristics of temporal data dissemination in VCPS. On this basis, we formulate the static and dynamic snapshot consistency requirements on serving real-time requests for temporal data items. Two online algorithms are proposed to enhance the system performance with different requirements. In particular, a reschedule mechanism is developed to make the scheduling adaptable to the dynamic snapshot consistency requirement. A comprehensive performance evaluation demonstrates the superiority of the proposed algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "163", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Goehringer:2014:ISI, author = "Diana Goehringer", title = "Introduction to the {Special Issue on Virtual Prototyping of Parallel and Embedded Systems (ViPES)}", journal = j-TECS, volume = "13", number = "5s", pages = "164:1--164:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2675739", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "164", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schumacher:2014:LLS, author = "Christoph Schumacher and Jan Henrik Weinstock and Rainer Leupers and Gerd Ascheid and Laura Tosoratto and Alessandro Lonardo and Dietmar Petras and Andreas Hoffmann", title = "{legaSCi}: Legacy {SystemC} Model Integration into Parallel Simulators", journal = j-TECS, volume = "13", number = "5s", pages = "165:1--165:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2678018", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Architects and developers use virtual prototypes of computer systems to receive early feedback on hardware design decisions as well as to develop and debug system software. This is facilitated by the comprehensive inspection capabilities virtual prototypes offer. For virtual prototypes, execution speed is crucial to support the users' productivity. Parallel simulation techniques are employed to offset the speed impact of the increasing number of cores that need to be simulated in virtual prototypes of parallel and embedded systems. SystemC is the de facto industry standard library for virtual platform modeling. Since currently no parallel SystemC library is commonly available, typical SystemC models are coded for execution in sequential simulation environments. Simply putting such models into parallel simulators may lead to thread-safety issues and may additionally cause nondeterministic simulator behavior. This article proposes a methodology to support simulation creators to face the challenge of integrating such legacy models into parallel SystemC environments. The feasibility of the proposed method is evaluated by parallelizing the latest instance of the EU FP7 project EURETILE embedded platform simulator. Using legaSCi, on four host processor cores a speedup of 2.13$ \times $ is demonstrated, without having to change the individual models of the simulator.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "165", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Razaghi:2014:HCM, author = "Parisa Razaghi and Andreas Gerstlauer", title = "Host-Compiled Multicore System Simulation for Early Real-Time Performance Evaluation", journal = j-TECS, volume = "13", number = "5s", pages = "166:1--166:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2678020", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With increasing complexity and software content, modern embedded platforms employ a heterogeneous mix of multicore processors along with hardware accelerators in order to provide high performance in limited power budgets. To evaluate real-time performance and other constraints, full system simulations are essential. With traditional approaches being either slow or inaccurate, so-called source-level or host-compiled simulators have recently emerged as a solution for rapid evaluation of the complete system at early design stages. In such approaches, a faster simulation is achieved by abstracting execution behavior and increasing simulation granularity. However, existing source-level simulators often focus on application behavior only while neglecting the effects of hardware/software interactions and their associated speed and accuracy trade-offs. In this article, we present a host-compiled simulator that emulates software execution in a full-system context. Our simulator incorporates abstract models of both real-time operating systems (RTOSs) and multicore processors to replicate timing-accurate hardware/software interactions and to enable full system cosimulation. An integrated approach for automatic timing granularity adjustment (ATGA) uses observations of the system state to automatically control the timing model and optimally navigate speed versus accuracy conditions. Results as applied to industrial-strength platforms confirm that OS- and system-level effects can significantly contribute to overall accuracy and simulation overhead. By providing careful abstractions, our models can achieve full system simulations at equivalent speeds of more than a thousand MIPS with less than 3\% timing error. Coupled with the capability to easily adjust simulation parameters and configurations, this demonstrates the benefits of our simulator for early application development and design space exploration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "166", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mihajlovic:2014:DIQ, author = "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren J. Gross", title = "Dynamically Instrumenting the {QEMU} Emulator for {Linux} Process Trace Generation with the {GDB} Debugger", journal = j-TECS, volume = "13", number = "5s", pages = "167:1--167:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2678022", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "In software debugging, trace generation techniques are used to resolve highly complex bugs. However, the emulators increasingly used for embedded software development do not yet offer the types of trace generation infrastructure available in hardware. In this article, we make changes to the ARM ISA emulation of the QEMU emulator to allow for continuous instruction-level trace generation. Using a standard GDB client, tracepoints can be inserted to dynamically log registers and memory addresses without altering executing code. The ability to run trace experiments in five different modes allows the scope of trace generation to be narrowed as needed, down to the level of a single Linux process. Our scheme collects the execution traces of a Linux process on average between 9.6x--0.7x the speed of existing QEMU trace capabilities, with 96.7\% less trace data volume. Compared to a software-instrumented tracing scheme, our method is both unobtrusive and performs on average between 3--4 orders of magnitude faster.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "167", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Diamantopoulos:2014:PFS, author = "Dionysios Diamantopoulos and Efstathios Sotiriou-Xanthopoulos and Kostas Siozios and George Economakos and Dimitrios Soudris", title = "{Plug\&Chip}: a Framework for Supporting Rapid Prototyping of {$3$D} Hybrid Virtual {SoCs}", journal = j-TECS, volume = "13", number = "5s", pages = "168:1--168:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2661634", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the embedded system domain there is a continuous demand towards providing higher flexibility for application development. This trend strives for virtual prototyping solutions capable of performing fast system simulation. Among other benefits, such a solution supports concurrent hardware/software system design by enabling to start developing, testing, and validating the embedded software substantially earlier than has been possible in the past. Towards this direction, throughout this article we introduce a new framework, named Plug\&Chip, targeting to support rapid prototyping of 2D and 3D digital systems. In contrast to other relevant approaches, our solution provides higher flexibility by enabling incremental system design, while also handling platforms developed with the usage of 3D integration technology.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "168", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Siozios:2014:FSA, author = "Kostas Siozios and Dimitrios Soudris and Michael H{\"u}bner", title = "A Framework for Supporting Adaptive Fault-Tolerant Solutions", journal = j-TECS, volume = "13", number = "5s", pages = "169:1--169:??", month = nov, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629473", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jan 7 15:03:31 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For decades, computer architects pursued one primary goal: performance. The ever-faster transistors provided by Moore's law were translated into remarkable gains in operation frequency and power consumption. However, the device-level size and architecture complexity impose several new challenges, including a decrease in dependability level due to physical failures. In this article we propose a software-supported methodology based on game theory for adapting the aggressiveness of fault tolerance at runtime. Experimental results prove the efficiency of our solution since it achieves comparable fault masking to relevant solutions, but with significantly lower mitigation cost. More specifically, our framework speeds up the identification of suspicious failure resources on average by 76\% as compared to the HotSpot tool. Similarly, the introduced solution leads to average Power$ \times $Delay (PDP) savings against an existing TMR approach by 53\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "169", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2015:ERS, author = "Sandeep K. Shukla", title = "Editorial: Regular, Special, and Related Issues", journal = j-TECS, volume = "14", number = "1", pages = "1:1--1:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2698230", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bishnoi:2015:BCC, author = "Rimpy Bishnoi and Vijay Laxmi and Manoj Singh Gaur and Jos{\'e} Flich and Francisco Trivi{\~n}o", title = "A Brief Comment on {``A Complete Self-Testing and Self-Configuring NoC Infrastructure for Cost-Effective MPSoCs'' [ACM Transactions on Embedded Computing Systems {\bf 12} (2013) Article 106]}", journal = j-TECS, volume = "14", number = "1", pages = "2:1--2:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2668121", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", note = "See \cite{Ghiribaldi:2013:CST}.", abstract = "In the Ghiribaldi et al. [2013] paper, a complete self-testing and self configuring NoC infrastructure for cost-effective MPSoCs was presented in order to make NoC architecture tolerant to faults. To overcome the complexity involved during the complete reconfiguration of routing instances in the face of most of the usual failure patterns, Ghiribaldi et al. [2013] proposed a fast self-reconfiguration algorithm. The algorithm is based on segment-based routing implemented using Logic-Based Distributed Routing (LBDR) and claimed to have handled the most common NoC faults. The purpose of this comment is to demonstrate the inconsistency of the fast self-configuration method presented in Ghiribaldi et al. [2013]. To handle inconsistency, we present the correct set of LBDR bits and also argue that complete reconfiguration of the routing instance is mandatory to handle some fault combinations. New coverage results of the fast self-reconfiguration algorithm of Ghiribaldi et al. [2013] are also presented.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Munir:2015:MAF, author = "Arslan Munir and Joseph Antoon and Ann Gordon-Ross", title = "Modeling and Analysis of Fault Detection and Fault Tolerance in Wireless Sensor Networks", journal = j-TECS, volume = "14", number = "1", pages = "3:1--3:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680538", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Technological advancements in communications and embedded systems have led to the proliferation of Wireless Sensor Networks (WSNs) in a wide variety of application domains. These application domains include but are not limited to mission-critical (e.g., security, defense, space, satellite) or safety-related (e.g., health care, active volcano monitoring) systems. One commonality across all WSN application domains is the need to meet application requirements (e.g., lifetime, reliability). Many application domains require that sensor nodes be deployed in harsh environments, such as on the ocean floor or in an active volcano, making these nodes more prone to failures. Sensor node failures can be catastrophic for critical or safety-related systems. This article models and analyzes fault detection and fault tolerance in WSNs. To determine the effectiveness and accuracy of fault detection algorithms, we simulate these algorithms using ns-2. We investigate the synergy between fault detection and fault tolerance and use the fault detection algorithms' accuracies in our modeling of Fault-Tolerant (FT) WSNs. We develop Markov models for characterizing WSN reliability and Mean Time to Failure (MTTF) to facilitate WSN application-specific design. Results obtained from our FT modeling reveal that an FT WSN composed of duplex sensor nodes can result in as high as a 100\% MTTF increase and approximately a 350\% improvement in reliability over a Non-Fault-Tolerant (NFT) WSN. The article also highlights future research directions for the design and deployment of reliable and trustworthy WSNs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sarkar:2015:STP, author = "Abhik Sarkar and Frank Mueller and Harini Ramaprasad", title = "Static Task Partitioning for Locked Caches in Multicore Real-Time Systems", journal = j-TECS, volume = "14", number = "1", pages = "4:1--4:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638557", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Growing processing demand on multitasking real-time systems can be met by employing scalable multicore architectures. For such environments, locking cache lines for hard real-time systems ensures timing predictability of data references and may lower worst-case execution time. This work studies the benefits of cache locking on massive multicore architectures with private caches in the context of hard real-time systems. In shared cache architectures, the cache is a single resource shared among all of the tasks. However, in scalable cache architectures with private caches, conflicts exist only among the tasks scheduled on one core. This calls for a cache-aware allocation of tasks onto cores. The objective of this work is to increase the predictability of memory accesses resolved by caches while reducing the number of cores for a given task set. This allows designers to reduce the footprint of their subsystem of real-time tasks and thereby cost, either by choosing a product with fewer cores as a target or to allow more subsystems to be co-located on a given fixed number of cores. Our work proposes a novel variant of the cache-unaware First Fit Decreasing (FFD) algorithm called Naive locked First Fit Decreasing (NFFD) policy. We propose two cache-aware static scheduling schemes: (a) Greedy First Fit Decreasing (GFFD) and (b) Colored First Fit Decreasing (CoFFD) for task sets where tasks do not have intratask conflicts among locked regions (Scenario A). NFFD is capable of scheduling high utilization task sets that FFD cannot schedule. Experiments also show that CoFFD consistently outperforms GFFD, resulting in a lower number of cores and lower system utilization. CoFFD reduces the number of core requirements by 30\% to 60\% compared to NFFD. For a more generic case where tasks have intratask conflicts, we split the task partitioning between two phases: task selection and task allocation (Scenario B). Instead of resolving conflicts at a global level, these algorithms resolve conflicts among regions while allocating a task onto a core and unlocking at region level instead of task level. We show that a combination of dynamic ordering (task selection) with Chaitin's Coloring (task allocation) scheme reduces the number of cores required by up to 22\% over a basic scheme (in a combination of monotone ordering and regional FFD). Regional unlocking allows this scheme to outperform CoFFD for medium utilization task sets from Scenario A. However, CoFFD performs better than any other scheme for high utilization task sets from Scenario A. Overall, this work is unique in considering the challenges of future multicore architectures for real-time systems and provides key insights into task partitioning and cache-locking mechanisms for architectures with private caches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tillenius:2015:RAT, author = "Martin Tillenius and Elisabeth Larsson and Rosa M. Badia and Xavier Martorell", title = "Resource-Aware Task Scheduling", journal = j-TECS, volume = "14", number = "1", pages = "5:1--5:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638554", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dependency-aware task-based parallel programming models have proven to be successful for developing efficient application software for multicore-based computer architectures. The programming model is amenable to programmers, thereby supporting productivity, whereas hardware performance is achieved through a runtime system that dynamically schedules tasks onto cores in such a way that all dependencies are respected. However, even if the scheduling is completely successful with respect to load balancing, the scaling with the number of cores may be suboptimal due to resource contention. Here we consider the problem of scheduling tasks not only with respect to their interdependencies but also with respect to their usage of resources, such as memory and bandwidth. At the software level, this is achieved by user annotations of the task resource consumption. In the runtime system, the annotations are translated into scheduling constraints. Experimental results for different hardware, demonstrating performance gains both for model examples and real applications, are presented. Furthermore, we provide a set of tools to detect resource sensitivity and predict the performance improvements that can be achieved by resource-aware scheduling. These tools are solely based on parallel execution traces and require no instrumentation or modification of the application code.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2015:JWU, author = "Yazhi Huang and Mengying Zhao and Chun Jason Xue", title = "Joint {WCET} and Update Activity Minimization for Cyber-Physical Systems", journal = j-TECS, volume = "14", number = "1", pages = "6:1--6:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680539", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A cyber-physical system (CPS) is a desirable computing platform for many industrial and scientific applications, such as industrial process monitoring, environmental monitoring, chemical processes, and battlefield surveillance. The application of CPSs has two challenges: First, CPSs often include a number of sensor nodes. Update of preloaded code on remote sensor nodes powered by batteries is extremely energy consuming. The code update issue in the energy-sensitive CPS must be carefully considered. Second, CPSs are often real-time embedded systems with real-time properties. Worst-case execution time (WCET) is one of the most important metrics in real-time system design. Whereas existing works only consider one of these two challenges at a time, in this article, a compiler optimization-joint WCET and update-conscious compilation, or WUCC-is proposed to jointly consider WCET and code update for CPSs. The novelty of the proposed approach is that the WCET problem and code update problem are considered concurrently such that a balanced solution with minimal WCET and minimal code difference can be achieved. The experimental results show that the proposed technique can minimize WCET and code difference effectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bertozzi:2015:PRA, author = "Davide Bertozzi and Stefano {Di Carlo} and Salvatore Galfano and Marco Indaco and Piero Olivo and Paolo Prinetto and Cristian Zambelli", title = "Performance and Reliability Analysis of Cross-Layer Optimizations of {NAND} Flash Controllers", journal = j-TECS, volume = "14", number = "1", pages = "7:1--7:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629562", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "NAND flash memories are becoming the predominant technology in the implementation of mass storage systems for both embedded and high-performance applications. However, when considering data and code storage in Non-Volatile Memories (NVMs), such as NAND flash memories, reliability and performance become a serious concern for systems designers. Designing NAND flash-based systems based on worst-case scenarios leads to waste of resources in terms of performance, power consumption, and storage capacity. This is clearly in contrast with the request for runtime reconfigurability, adaptivity, and resource optimization in modern computing systems. There is a clear trend toward supporting differentiated access modes in flash memory controllers, each one setting a differentiated tradeoff point in the performance-reliability optimization space. This is supported by the possibility of tuning the NAND flash memory performance, reliability, and power consumption through several tuning knobs such as the flash programming algorithm and the flash error correcting code. However, to successfully exploit these degrees of freedom, it is mandatory to clearly understand the effect that the combined tuning of these parameters has on the full NVM subsystem. This article performs a comprehensive quantitative analysis of the benefits provided by the runtime reconfigurability of an MLC NAND flash controller through the combined effect of an adaptable memory programming circuitry coupled with runtime adaptation of the ECC correction capability. The full NVM subsystem is taken into account, starting from a characterization of the low-level circuitry to the effect of the adaptation on a wide set of realistic benchmarks in order to provide readers a clear view of the benefit this combined adaptation may provide at the system level.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2015:SLP, author = "Ye-Jyun Lin and Chia-Lin Yang and Jiao-We Huang and Tay-Jyi Lin and Chih-Wen Hsueh and Naehyuck Chang", title = "System-Level Performance and Power Optimization for {MPSoC}: a Memory Access-Aware Approach", journal = j-TECS, volume = "14", number = "1", pages = "8:1--8:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the number of IPs in a multimedia Multi-Processor System-on-Chip (MPSoC) continues to increase, concurrent memory accesses from different IPs increasingly stress memory systems, which presents both opportunities and challenges for future MPSoC design. The impact of such requirements on the system-level design for MPSoC is twofold. First, contention among IPs prolongs memory access time, which exacerbates the persisting memory wall problem. Second, longer memory accesses lead to longer IP stall time, which results in unnecessary leakage waste. In this article, we propose two memory access-aware system-level design approaches for performance and leakage optimization. To alleviate the memory wall problem, we propose a Hierarchical Memory Scheduling (HMS) policy that schedules memory requests from the same IP and application consecutively to reduce interference among memory accesses from different IPs with a fairness guarantee. To reduce IP leakage waste due to long memory access, we propose a memory access-aware power-gating policy. A straightforward power-gating approach is to power gate an IP when it needs to fetch data from memory. However, due to the response time variation among memory accesses, aggressively power gating an IP whenever a memory request occurs may result in incorrect power-gating decisions. The proposed memory access-aware power-gating policy makes these decisions judiciously, based on the predicted memory latency of an individual IP and its energy breakeven time. The experimental results show that the proposed HMS memory scheduling policy improves system throughput by 42\% compared to First-Come-First-Serve (FCFS) and by 21\% compared to First-Ready First-Come-First-Serve (FR-FCFS) on an MPSoC for mobile phones. For the improvement of fairness, HMS improves fairness by 1.52$ \times $ compared to FCFS and by 1.23$ \times $ compared to FRFCFS. In the aspect of leakage optimization, our memory access-aware power-gating mechanism improves energy savings by 3.88$ \times $ and reduces the performance penalty by 70\% compared to conventional timeout-based power gating. We further demonstrate that our HMS memory scheduler can regulate memory access orders, thereby reducing memory response time variation. This leads to more accurate power-down decisions for both conventional timeout power gating and the proposed memory access- aware power gating.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Borgstrom:2015:PCW, author = "Johannes Borgstrom and Ramunas Gutkovas and Ioana Rodhe and Bj{\"o}rn Victor", title = "The Psi-Calculi Workbench: a Generic Tool for Applied Process Calculi", journal = j-TECS, volume = "14", number = "1", pages = "9:1--9:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2682570", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Psi-calculi is a parametric framework for extensions of the pi-calculus with arbitrary data and logic. All instances of the framework inherit machine-checked proofs of the metatheory such as compositionality and bisimulation congruence. We present a generic analysis tool for psi-calculus instances, enabling symbolic execution and (bi)simulation checking for both unicast and broadcast communication. The tool also provides a library for implementing new psi-calculus instances. We provide examples from traditional communication protocols and wireless sensor networks. We also describe the theoretical foundations of the tool, including an improved symbolic operational semantics, with additional support for scoped broadcast communication.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{VanHulst:2015:MSH, author = "A. C. {Van Hulst} and M. A. Reniers and W. J. Fokkink", title = "Maximal Synthesis for {Hennessy--Milner} Logic", journal = j-TECS, volume = "14", number = "1", pages = "10:1--10:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680540", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article concerns the maximal synthesis for Hennessy--Milner Logic on Kripke structures with labeled transitions. We formally define, and prove the validity of, a theoretical framework that modifies a Kripke model to the least possible extent in order to satisfy a given HML formula. Applications of this work can be found in the field of controller synthesis and supervisory control for discrete-event systems. Synthesis is realized technically by first projecting the given Kripke model onto a bisimulation-equivalent partial tree representation, thereby unfolding up to the depth of the synthesized formula. Operational rules then define the required adaptations upon this structure in order to achieve validity of the synthesized formula. Synthesis might result in multiple valid adaptations, which are all related to the original model via simulation. Each simulant of the original Kripke model, which satisfies the synthesized formula, is also related to one of the synthesis results via simulation. This indicates maximality, or maximal permissiveness, in the context of supervisory control. In addition to the formal construction of synthesis as presented in this article, we present it in algorithmic form and analyze its computational complexity. Computer-verified proofs for two important theorems in this article have been created using the Coq proof assistant.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boucheneb:2015:SST, author = "Hanifa Boucheneb and Kamel Barkaoui", title = "Stubborn Sets for Time {Petri} Nets", journal = j-TECS, volume = "14", number = "1", pages = "11:1--11:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680541", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The main limitation of the verification approaches based on state enumeration is the state explosion problem. The partial order reduction techniques aim at attenuating this problem by reducing the number of transitions to be fired from each state while preserving properties of interest. Among the reduction techniques proposed in the literature, this article considers the stubborn set method of Petri nets and investigates its extension to time Petri nets. It establishes some useful sufficient conditions for stubborn sets, which preserve deadlocks and k-boundedness of places.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2015:HFY, author = "Abhisek Pan and Rance Rodrigues and Sandip Kundu", title = "A Hardware Framework for Yield and Reliability Enhancement in Chip Multiprocessors", journal = j-TECS, volume = "14", number = "1", pages = "12:1--12:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629688", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Device reliability and manufacturability have emerged as dominant concerns in end-of-road CMOS devices. An increasing number of hardware failures are attributed to manufacturability or reliability problems. Maintaining an acceptable manufacturing yield for chips containing tens of billions of transistors with wide variations in device parameters has been identified as a great challenge. Additionally, today's nanometer scale devices suffer from accelerated aging effects because of the extreme operating temperature and electric fields they are subjected to. Unless addressed in design, aging-related defects can significantly reduce the lifetime of a product. In this article, we investigate a micro-architectural scheme for improving yield and reliability of homogeneous chip multiprocessors (CMPs). The proposed solution involves a hardware framework that enables us to utilize the redundancies inherent in a multicore system to keep the system operational in the face of partial failures. A micro-architectural modification allows a faulty core in a CMP to use another core's resources to service any instruction that the former cannot execute correctly by itself. This service improves yield and reliability but may cause loss of performance. The target platform for quantitative evaluation of performance under degradation is a dual-core and a quad-core chip multiprocessor with one or more cores sustaining partial failure. Simulation studies indicate that when a large, high-latency, and sparingly used unit such as a floating-point unit fails in a core, correct execution may be sustained through outsourcing with at most a 16\% impact on performance for a floating-point intensive application. For applications with moderate floating-point load, the degradation is insignificant. The performance impact may be mitigated even further by judicious selection of the cores to commandeer depending on the current load on each of the candidate cores. The area overhead is also negligible due to resource reuse.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lazarescu:2015:ITB, author = "Mihai T. Lazarescu and Luciano Lavagno", title = "Interactive Trace-Based Analysis Toolset for Manual Parallelization of {C} Programs", journal = j-TECS, volume = "14", number = "1", pages = "13:1--13:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638556", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Massive amounts of legacy sequential code need to be parallelized to make better use of modern multiprocessor architectures. Nevertheless, writing parallel programs is still a difficult task. Automated parallelization methods can be effective both at the statement and loop levels and, recently, at the task level, but they are still restricted to specific source code constructs or application domains. We present in this article an innovative toolset that supports developers when performing manual code analysis and parallelization decisions. It automatically collects and represents the program profile and data dependencies in an interactive graphical format that facilitates the analysis and discovery of manual parallelization opportunities. The toolset can be used for arbitrary sequential C programs and parallelization patterns. Also, its program-scope data dependency tracing at runtime can complement the tools based on static code analysis and can also benefit from it at the same time. We also tested the effectiveness of the toolset in terms of time to reach parallelization decisions and of their quality. We measured a significant improvement for several real-world representative applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Quan:2015:HTM, author = "Wei Quan and Andy D. Pimentel", title = "A Hybrid Task Mapping Algorithm for Heterogeneous {MPSoCs}", journal = j-TECS, volume = "14", number = "1", pages = "14:1--14:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680542", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The application workloads in modern MPSoC-based embedded systems are becoming increasingly dynamic. Different applications concurrently execute and contend for resources in such systems, which could cause serious changes in the intensity and nature of the workload demands over time. To cope with the dynamism of application workloads at runtime and improve the efficiency of the underlying system architecture, this article presents a hybrid task mapping algorithm that combines a static mapping exploration and a dynamic mapping optimization to achieve an overall improvement of system efficiency. We evaluate our algorithm using a heterogeneous MPSoC system with three real applications. Experimental results reveal the effectiveness of our proposed algorithm by comparing derived solutions to the ones obtained from several other runtime mapping algorithms. In test cases with three simultaneously active applications, the mapping solutions derived by our approach have average performance improvements ranging from 45.9\% to 105.9\% and average energy savings ranging from 14.6\% to 23.5\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Petrucci:2015:EET, author = "Vinicius Petrucci and Orlando Loques and Daniel Moss{\'e} and Rami Melhem and Neven Abou Gazala and Sameh Gobriel", title = "Energy-Efficient Thread Assignment Optimization for Heterogeneous Multicore Systems", journal = j-TECS, volume = "14", number = "1", pages = "15:1--15:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2566618", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The current trend to move from homogeneous to heterogeneous multicore systems provides compelling opportunities for achieving performance and energy efficiency goals. Running multiple threads in multicore systems poses challenges on meeting limited shared resources, such as memory bandwidth. We propose an optimization approach that includes an Integer Linear Programming (ILP) optimization model and a scheme to dynamically determine thread-to-core assignment. We present simulation analysis that shows energy savings and performance gains for a variety of workloads compared to state-of-the-art schemes. We implemented and evaluated a prototype of our thread assignment approach at user level, leveraging Linux scheduling and performance-monitoring capabilities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2015:ESV, author = "Zhengfeng Yang and Wang Lin and Min Wu", title = "Exact Safety Verification of Hybrid Systems Based on Bilinear {SOS} Representation", journal = j-TECS, volume = "14", number = "1", pages = "16:1--16:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629424", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we address the problem of safety verification of nonlinear hybrid systems. A hybrid symbolic-numeric method is presented to compute exact inequality invariants of hybrid systems efficiently. Some numerical invariants of a hybrid system can be obtained by solving a bilinear SOS programming via the PENBMI solver or iterative method, then the modified Newton refinement and rational vector recovery techniques are applied to obtain exact polynomial invariants with rational coefficients, which exactly satisfy the conditions of invariants. Experiments on some benchmarks are given to illustrate the efficiency of our algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rodrigues:2015:DSE, author = "Rance Rodrigues and Israel Koren and Sandip Kundu", title = "Does the Sharing of Execution Units Improve Performance\slash Power of Multicores?", journal = j-TECS, volume = "14", number = "1", pages = "17:1--17:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2680543", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Several studies and recent real-world designs have promoted sharing of underutilized resources between cores in a multicore processor to achieve better performance/power. It has been argued that when utilization of such resources is low, sharing has a negligible impact on performance while offering considerable area and power benefits. In this article, we investigate the performance and performance/watt implications of sharing large and underutilized resources between pairs of cores in a multicore. We first study sharing of the entire floating-point datapath (including reservation stations and execution units) by two cores, similar to AMD's Bulldozer. We find that while this architecture results in power savings for certain workload combinations, it also results in significant performance loss of up to 28\%. Next, we study an alternative sharing architecture where only the floating-point execution units are shared, while the individual cores retain their reservation stations. This reduces the highest performance loss to 14\%. We then extend the study to include sharing of other large execution units that are used infrequently, namely, the integer multiply and divide units. Subsequently, we analyze the impact of sharing hardware resources in Simultaneously Multithreaded (SMT) processors where multiple threads run concurrently on the same core. We observe that sharing improves performance/watt at a negligible performance cost only if the shared units have high throughput. Sharing low-throughput units reduces both performance and performance/watt. To increase the throughput of the shared units, we propose the use of Dynamic Voltage and Frequency Boosting (DVFB) of only the shared units that can be placed on a separate voltage island. Our results indicate that the use of DVFB improves both performance and performance/watt by as much as 22\% and 10\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Diamantopoulos:2015:GPA, author = "Dionysios Diamantopoulos and Kostas Siozios and Sotirios Xydis and Dimitrios Soudris", title = "{GENESIS}: Parallel Application Placement onto Reconfigurable Architectures (Invited for the Special Issue on Runtime Management)", journal = j-TECS, volume = "14", number = "1", pages = "18:1--18:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629651", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Placement is though as the most time-consuming processes in physical implementation flows for reconfigurable architectures, while it highly affects the quality of derived application implementation, as it has impact on the maximum operating frequency. Throughout this article, we propose a novel placer, based on genetic algorithm, targeting to FPGAs. Rather than relevant approaches, which are executed sequentially, the new placer exhibits inherent parallelism, which can benefit from multicore processors. Experimental results prove the effectiveness of this solution, as it achieves average reduction of execution runtime and application's delay by 67$ \times $ and 16\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pager:2015:SSM, author = "Jared Pager and Reiley Jeyapaul and Aviral Shrivastava", title = "A Software Scheme for Multithreading on {CGRAs}", journal = j-TECS, volume = "14", number = "1", pages = "19:1--19:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638558", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 22 06:25:23 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent industry trends show a drastic rise in the use of hand-held embedded devices, from everyday applications to medical (e.g., monitoring devices) and critical defense applications (e.g., sensor nodes). The two key requirements in the design of such devices are their processing capabilities and battery life. There is therefore an urgency to build high-performance and power-efficient embedded devices, inspiring researchers to develop novel system designs for the same. The use of a coprocessor (application-specific hardware) to offload power-hungry computations is gaining favor among system designers to suit their power budgets. We propose the use of CGRAs (Coarse-Grained Reconfigurable Arrays) as a power-efficient coprocessor. Though CGRAs have been widely used for streaming applications, the extensive compiler support required limits its applicability and use as a general purpose coprocessor. In addition, a CGRA structure can efficiently execute only one statically scheduled kernel at a time, which is a serious limitation when used as an accelerator to a multithreaded or multitasking processor. In this work, we envision a multithreaded CGRA where multiple schedules (or kernels) can be executed simultaneously on the CGRA (as a coprocessor). We propose a comprehensive software scheme that transforms the traditionally single-threaded CGRA into a multithreaded coprocessor to be used as a power-efficient accelerator for multithreaded embedded processors. Our software scheme includes (1) a compiler framework that integrates with existing CGRA mapping techniques to prepare kernels for execution on the multithreaded CGRA and (2) a runtime mechanism that dynamically schedules multiple kernels (offloaded from the processor) to execute simultaneously on the CGRA coprocessor. Our multithreaded CGRA coprocessor implementation thus makes it possible to achieve improved power-efficient computing in modern multithreaded embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2015:EOS, author = "Sandeep K. Shukla", title = "Editorial: Oh Security --- Where Art Thou?", journal = j-TECS, volume = "14", number = "2", pages = "20:1--20:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2742044", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rafiliu:2015:SOR, author = "Sergiu Rafiliu and Petru Eles and Zebo Peng and Michael Lemmon", title = "Stability of Online Resource Managers for Distributed Systems under Execution Time Variations", journal = j-TECS, volume = "14", number = "2", pages = "21:1--21:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629495", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today's embedded systems are exposed to variations in resource usage due to complex software applications, hardware platforms, and impact of the runtime environments. When these variations are large and efficiency is required, on-line resource managers may be deployed on the system to help it control its resource usage. An often neglected problem is whether these resource managers are stable, meaning that the resource usage is controlled under all possible scenarios. In distributed systems, this problem is particularly hard because applications distributed over many resources generate complex dependencies between their resources. In this article, we develop a mathematical model of the system, and derive conditions that, if satisfied, guarantee stability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Loke:2015:MCS, author = "Seng W. Loke and Keegan Napier and Abdulaziz Alali and Niroshinie Fernando and Wenny Rahayu", title = "Mobile Computations with Surrounding Devices: Proximity Sensing and {MultiLayered} Work Stealing", journal = j-TECS, volume = "14", number = "2", pages = "22:1--22:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656214", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the proliferation of mobile devices, and their increasingly powerful embedded processors and storage, vast resources increasingly surround users. We have been investigating the concept of on-demand ad hoc forming of groups of nearby mobile devices in the midst of crowds to cooperatively perform computationally intensive tasks as a service to local mobile users, or what we call mobile crowd computing. As devices can vary in processing power and some can leave a group unexpectedly or new devices join in, there is a need for algorithms that can distribute work in a flexible manner and still work with different arrangements of devices that can arise in an ad hoc fashion. In this article, we first argue for the feasibility of such use of crowd-embedded computations using theoretical justifications and reporting on our experiments on Bluetooth-based proximity sensing. We then present a multilayered work-stealing style algorithm for distributing work efficiently among mobile devices and compare speedups attainable for different topologies of devices networked with Bluetooth, justifying a topology-flexible opportunistic approach. While our experiments are with Bluetooth and mobile devices, the approach is applicable to ecosystems of various embedded devices with powerful processors, networking technologies, and storage that will increasingly surround users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Malik:2015:HRT, author = "Avinash Malik and David Gregg", title = "Heuristics on Reachability Trees for Bicriteria Scheduling of Stream Graphs on Heterogeneous Multiprocessor Architectures", journal = j-TECS, volume = "14", number = "2", pages = "23:1--23:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638553", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we partition and schedule Synchronous Dataflow (SDF) graphs onto heterogeneous execution architectures in such a way as to minimize energy consumption and maximize throughput. Partitioning and scheduling SDF graphs onto homogeneous architectures is a well-known NP-hard problem. The heterogeneity of the execution architecture makes our problem exponentially challenging to solve. We model the problem as a weighted sum and solve it using novel state space exploration inspired from the theory of parallel automata. The resultant heuristic algorithm results in good scheduling when implemented in an existing stream framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Martin:2015:ROS, author = "Paul Martin and Lucas Wanner and Mani Srivastava", title = "Runtime Optimization of System Utility with Variable Hardware", journal = j-TECS, volume = "14", number = "2", pages = "24:1--24:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656338", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Increasing hardware variability in newer integrated circuit fabrication technologies has caused corresponding power variations on a large scale. These variations are particularly exaggerated for idle power consumption, motivating the need to mitigate the effects of variability in systems whose operation is dominated by long idle states with periodic active states. In systems where computation is severely limited by anemic energy reserves and where a long overall system lifetime is desired, maximizing the quality of a given application subject to these constraints is both challenging and an important step toward achieving high-quality deployments. This work describes VaRTOS, an architecture and corresponding set of operating system abstractions that provide explicit treatment of both idle and active power variations for tasks running in real-time operating systems. Tasks in VaRTOS express elasticity by exposing individual knobs -shared variables that the operating system can tune to adjust task quality and, correspondingly, task power, maximizing application utility both on a per-task and on a system-wide basis. We provide results regarding online learning of instance-specific sleep power, active power, and task-level power expenditure on simulated hardware with demonstrated effects for several prototypical applications. Our results on networked sensing applications, which are representative of a broader category of applications that VaRTOS targets, show that VaRTOS can reduce variability-induced energy expenditure errors from over 70\% in many cases to under 2\% in most cases and under 5\% in the worst case.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gomony:2015:RTM, author = "Manil Dev Gomony and Benny Akesson and Kees Goossens", title = "A Real-Time Multichannel Memory Controller and Optimal Mapping of Memory Clients to Memory Channels", journal = j-TECS, volume = "14", number = "2", pages = "25:1--25:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2661635", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Ever-increasing demands for main memory bandwidth and memory speed/power tradeoff led to the introduction of memories with multiple memory channels, such as Wide IO DRAM. Efficient utilization of a multichannel memory as a shared resource in multiprocessor real-time systems depends on mapping of the memory clients to the memory channels according to their requirements on latency, bandwidth, communication, and memory capacity. However, there is currently no real-time memory controller for multichannel memories, and there is no methodology to optimally configure multichannel memories in real-time systems. As a first work toward this direction, we present two main contributions in this article: (1) a configurable real-time multichannel memory controller architecture with a novel method for logical-to-physical address translation and (2) two design-time methods to map memory clients to the memory channels, one an optimal algorithm based on an integer programming formulation of the mapping problem, and the other a fast heuristic algorithm. We demonstrate the real-time guarantees on bandwidth and latency provided by our multichannel memory controller architecture by experimental evaluation. Furthermore, we compare the performance of the mapping problem formulation in a solver and the heuristic algorithm against two existing mapping algorithms in terms of computation time and mapping success ratio. We show that an optimal solution can be found in 2 hours using the solver and in less than 1 second with less than 7\% mapping failure using the heuristic for realistically sized problems. Finally, we demonstrate configuring a Wide IO DRAM in a high-definition (HD) video and graphics processing system to emphasize the practical applicability and effectiveness of this work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jezequel:2015:FPA, author = "Lo{\"\i}g Jezequel and Eric Fabre and Victor Khomenko", title = "Factored Planning: From Automata to {Petri} Nets", journal = j-TECS, volume = "14", number = "2", pages = "26:1--26:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656215", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Factored planning mitigates the state explosion problem by avoiding the construction of the state space of the whole system and instead working with the system's components. Traditionally, finite automata have been used to represent the components, with the overall system being represented as their product. In this article, we change the representation of components to safe Petri nets. This allows one to use cheap structural operations like transition contractions to reduce the size of the Petri net before its state space is generated, which often leads to substantial savings compared with automata. The proposed approach has been implemented and proved efficient on several factored planning benchmarks. This article is an extended version of our ACSD 2013 paper [Jezequel et al. 2013], with the addition of the proofs and the experimental results of Sections 6 and 7.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Taniuchi:2015:AUI, author = "Daisuke Taniuchi and Takuya Maekawa", title = "Automatic Update of Indoor Location Fingerprints with Pedestrian Dead Reckoning", journal = j-TECS, volume = "14", number = "2", pages = "27:1--27:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2667226", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we propose a new method for automatically updating a Wi-Fi indoor positioning model on a cloud server by employing uploaded sensor data obtained from the smartphone sensors of a specific user who spends a lot of time in a given environment (e.g., a worker in the environment). In this work, we attempt to track the user with pedestrian dead reckoning techniques, and at the same time we obtain Wi-Fi scan data from a mobile device possessed by the user. With the scan data and the estimated coordinates uploaded to a cloud server, we can automatically create a pair consisting of a scan and its corresponding indoor coordinates during the user's daily life and update an indoor positioning model on the server by using the information. With this approach, we try to cope with the instability of Wi-Fi-based positioning methods caused by changing environmental dynamics, that is, layout changes and moving or removal of Wi-Fi access points. Therefore, ordinary users (e.g., customers) who do not have rich sensors can benefit from the continually updating positioning model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jimenez:2015:LSC, author = "Xavier Jimenez and David Novo and Paolo Ienne", title = "{Libra}: Software-Controlled Cell Bit-Density to Balance Wear in {NAND} Flash", journal = j-TECS, volume = "14", number = "2", pages = "28:1--28:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2638552", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hybrid flash storages combine a small Single-Level Cell (SLC) partition with a large Multilevel Cell (MLC) partition. Compared to MLC-only solutions, the SLC partition exploits fast and short local write updates, while the MLC part brings large capacity. On the whole, hybrid storage achieves a tangible performance improvement for a moderate extra cost. Yet, device lifetime is an important aspect often overlooked: in a hybrid system, a large ratio of writes may be directed to the small SLC partition, thus generating a local stress that could exhaust the SLC lifetime significantly sooner than the MLC partition's. To address this issue, we propose Libra, which builds on flash storage made solely of MLC flash and uses the memory devices in SLC mode when appropriate; that is, we exploit the fact that writing a single bit per cell in an MLC provides characteristics close to those of an ordinary SLC. In our scheme, the cell bit-density of a block can be decided dynamically by the flash controller, and the physical location of the SLC partition can now be moved around the whole device, balancing wear across it. This article provides a thorough analysis and characterization of the SLC mode for MLCs and gives evidence that the inherent flexibility provided by Libra simplifies considerably the stress balance on the device. Overall, our technique improves lifetime by up to one order of magnitude at no cost when compared to any hybrid storage that relies on a static SLC-MLC partitioning.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2015:PVL, author = "Li-Pin Chang and Yo-Chuan Su and I-Chen Wu", title = "Plugging Versus Logging: Adaptive Buffer Management for Hybrid-Mapping {SSDs}", journal = j-TECS, volume = "14", number = "2", pages = "29:1--29:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629455", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A promising technique to improve the write performance of solid-state disks (SSDs) is to use a disk write buffer. The goals of a write buffer is not only to reduce the write traffic to the flash chips but also to convert host write patterns into long and sequential write bursts. This study proposes a new buffer design consisting of a replacement policy and a write-back policy. The buffer monitors how the host workload stresses the flash translation layer upon garbage collection. This is used to dynamically adjust its replacement and write-back strategies for a good balance between write sequentiality and write randomness. When the garbage collection overhead is low, the write buffer favors high write sequentiality over low write randomness. When the flash translation layer observes a high overhead of garbage collection, the write buffer favors low write randomness over high write sequentiality. The proposed buffer design outperformed existing approaches by up to 20\% under various workloads and flash translation algorithms, as will be shown in experiment results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jia:2015:TAD, author = "Zhiping Jia and Yang Li and Yi Wang and Meng Wang and Zili Shao", title = "Temperature-Aware Data Allocation for Embedded Systems with Cache and Scratchpad Memory", journal = j-TECS, volume = "14", number = "2", pages = "30:1--30:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629650", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The hybrid memory architecture that contains both on-chip cache and scratchpad memory (SPM) has been widely used in embedded systems. In this article, we explore this hybrid memory architecture by jointly optimizing time performance and temperature for embedded systems with loops. Our basic idea is to adaptively adjust the workload distribution between cache and SPM based on the current temperature. For a problem in which the workload can be estimated a priori, we present a nonlinear programming formulation to optimally minimize the total execution time of a loop under the constraints of SPM size and temperature. To solve a problem in which the workload is not known a priori, we propose a temperature-aware adaptive loop scheduling algorithm called TALS to dynamically allocate data to cache and SPM at runtime. The experimental results show that our algorithms can effectively achieve both performance and temperature optimization for embedded systems with cache and SPM.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2015:MPA, author = "Weihua Zhang and Jiaxin Li and Yi Li and Haibo Chen", title = "Multilevel Phase Analysis", journal = j-TECS, volume = "14", number = "2", pages = "31:1--31:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629594", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Phase analysis, which classifies the set of execution intervals with similar execution behavior and resource requirements, has been widely used in a variety of systems, including dynamic cache reconfiguration, prefetching, race detection, and sampling simulation. Although phase granularity has been a major factor in the accuracy of phase analysis, it has not been well investigated, and most systems usually adopt a fine-grained scheme. However, such a scheme can only take account of recent local phase information and could be frequently interfered by temporary noise due to instant phase changes, which might notably limit the accuracy. In this article, we make the first investigation on the potential of multilevel phase analysis (MLPA), where different granularity phase analyses are combined together to improve the overall accuracy. The key observation is that the coarse-grained intervals belonging to the same phase usually consist of stably distributed fine-grained phases. Moreover, the phase of a coarse-grained interval can be accurately identified based on the fine-grained intervals at the beginning of its execution. Based on the observation, we design and implement an MLPA scheme. In such a scheme, a coarse-grained phase is first identified based on the fine-grained intervals at the beginning of its execution. The following fine-grained phases in it are then predicted based on the sequence of fine-grained phases in the coarse-grained phase. Experimental results show that such a scheme can notably improve the prediction accuracy. Using a Markov fine-grained phase predictor as the baseline, MLPA can improve prediction accuracy by 20\%, 39\%, and 29\% for next phase, phase change, and phase length prediction for SPEC2000, respectively, yet incur only about 2\% time overhead and 40\% space overhead (about 360 bytes in total). To demonstrate the effectiveness of MLPA, we apply it to a dynamic cache reconfiguration system that dynamically adjusts the cache size to reduce the power consumption and access time of the data cache. Experimental results show that MLPA can further reduce the average cache size by 15\% compared to the fine-grained scheme. Moreover, for MLPA, we also observe that coarse-grained phases can better capture the overall program characteristics with fewer of phases and the last representative phase could be classified in a very early program position, leading to fewer execution internals being functionally simulated. Based on this observation, we also design a multilevel sampling simulation technique that combines both fine- and coarse-grained phase analysis for sampling simulation. Such a scheme uses fine-grained simulation points to represent only the selected coarse-grained simulation points instead of the entire program execution; thus, it could further reduce both the functional and detailed simulation time. Experimental results show that MLPA for sampling simulation can achieve a speedup in simulation time of about 8.3X with similar accuracy compared to 10M SimPoint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banaiyanmofrad:2015:UFF, author = "Abbas Banaiyanmofrad and Houman Homayoun and Nikil Dutt", title = "Using a Flexible Fault-Tolerant Cache to Improve Reliability for Ultra Low Voltage Operation", journal = j-TECS, volume = "14", number = "2", pages = "32:1--32:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629566", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Caches are known to consume a large part of total microprocessor power. Traditionally, voltage scaling has been used to reduce both dynamic and leakage power in caches. However, aggressive voltage reduction causes process-variation--induced failures in cache SRAM arrays, which compromise cache reliability. In this article, we propose FFT-Cache, a flexible fault-tolerant cache that uses a flexible defect map to configure its architecture to achieve significant reduction in energy consumption through aggressive voltage scaling while maintaining high error reliability. FFT-Cache uses a portion of faulty cache blocks as redundancy-using block-level or line-level replication within or between sets-to tolerate other faulty caches lines and blocks. Our configuration algorithm categorizes the cache lines based on degree of conflict between their blocks to reduce the granularity of redundancy replacement. FFT-Cache thereby sacrifices a minimal number of cache lines to avoid impacting performance while tolerating the maximum amount of defects. Our experimental results on a processor executing SPEC2K benchmarks demonstrate that the operational voltage of both L1/L2 caches can be reduced down to 375 mV, which achieves up to 80\% reduction in the dynamic power and up to 48\% reduction in the leakage power. This comes with only a small performance loss ({$<$}\%5) and 13\% area overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Owaida:2015:EDS, author = "Muhsen Owaida and Gabriel Falcao and Joao Andrade and Christos Antonopoulos and Nikolaos Bellas and Madhura Purnaprajna and David Novo and Georgios Karakonstantis and Andreas Burg and Paolo Ienne", title = "Enhancing Design Space Exploration by Extending {CPU\slash GPU} Specifications onto {FPGAs}", journal = j-TECS, volume = "14", number = "2", pages = "33:1--33:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656207", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The design cycle for complex special-purpose computing systems is extremely costly and time-consuming. It involves a multiparametric design space exploration for optimization, followed by design verification. Designers of special purpose VLSI implementations often need to explore parameters, such as optimal bitwidth and data representation, through time-consuming Monte Carlo simulations. A prominent example of this simulation-based exploration process is the design of decoders for error correcting systems, such as the Low-Density Parity-Check (LDPC) codes adopted by modern communication standards, which involves thousands of Monte Carlo runs for each design point. Currently, high-performance computing offers a wide set of acceleration options that range from multicore CPUs to Graphics Processing Units (GPUs) and Field Programmable Gate Arrays (FPGAs). The exploitation of diverse target architectures is typically associated with developing multiple code versions, often using distinct programming paradigms. In this context, we evaluate the concept of retargeting a single OpenCL program to multiple platforms, thereby significantly reducing design time. A single OpenCL-based parallel kernel is used without modifications or code tuning on multicore CPUs, GPUs, and FPGAs. We use SOpenCL (Silicon to OpenCL), a tool that automatically converts OpenCL kernels to RTL in order to introduce FPGAs as a potential platform to efficiently execute simulations coded in OpenCL. We use LDPC decoding simulations as a case study. Experimental results were obtained by testing a variety of regular and irregular LDPC codes that range from short/medium (e.g., 8,000 bit) to long length (e.g., 64,800 bit) DVB-S2 codes. We observe that, depending on the design parameters to be simulated, on the dimension and phase of the design, the GPU or FPGA may suit different purposes more conveniently, thus providing different acceleration factors over conventional multicore CPUs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2015:TWA, author = "Tianzheng Wang and Duo Liu and Yi Wang and Zili Shao", title = "Towards Write-Activity-Aware Page Table Management for Non-volatile Main Memories", journal = j-TECS, volume = "14", number = "2", pages = "34:1--34:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2697394", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Non-volatile memories such as phase change memory (PCM) and memristor are being actively studied as an alternative to DRAM-based main memory in embedded systems because of their properties, which include low power consumption and high density. Though PCM is one of the most promising candidates with commercial products available, its adoption has been greatly compromised by limited write endurance. As main memory is one of the most heavily accessed components, it is critical to prolong the lifetime of PCM. In this article, we present {Write- Activity-aware Page Table Management} (WAPTM), a simple yet effective page table management scheme for reducing unnecessary writes, by redesigning system software and exploiting write-activity-aware features provided by the hardware. We implemented WAPTM in Google Android based on the ARM architecture and evaluated it with real Android applications. Experimental results show that WAPTM can significantly reduce writes in page tables, proving the feasibility and potential of prolonging the lifetime of PCM-based main memory through reducing writes at the OS level.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsai:2015:JPI, author = "Chun-Jen Tsai and Han-Wen Kuo and Zigang Lin and Zi-Jing Guo and Jun-Fu Wang", title = "A {Java} Processor {IP} Design for Embedded {SoC}", journal = j-TECS, volume = "14", number = "2", pages = "35:1--35:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629649", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/java2000.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a reusable Java processor IP for application processors of embedded systems. For the Java microarchitecture, we propose a low-cost stack memory design that supports a two-fold instruction folding pipeline and a low-complexity Java exception handling hardware. We also propose a mapping between the Java dynamic class loading model and the SoC platform-based design principle so that the Java core can be encapsulated as a reusable IP. To achieve this goal, a two-level method area with two on-chip circular buffers is proposed as an interface between the RISC core and the Java core. The proposed architecture is implemented on a Xilinx Virtex-5 FPGA device. Experimental results show that its performance has some advantages over other Java processors and a Java VM with JIT acceleration on a PowerPC platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ttofis:2015:HEA, author = "Christos Ttofis and Christos Kyrkou and Theocharis Theocharides", title = "A Hardware-Efficient Architecture for Accurate Real-Time Disparity Map Estimation", journal = j-TECS, volume = "14", number = "2", pages = "36:1--36:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629699", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Emerging embedded vision systems utilize disparity estimation as a means to perceive depth information to intelligently interact with their host environment and take appropriate actions. Such systems demand high processing performance and accurate depth perception while requiring low energy consumption, especially when dealing with mobile and embedded applications, such as robotics, navigation, and security. The majority of real-time dedicated hardware implementations of disparity estimation systems have adopted local algorithms relying on simple cost aggregation strategies with fixed and rectangular correlation windows. However, such algorithms generally suffer from significant ambiguity along depth borders and areas with low texture. To this end, this article presents the hardware architecture of a disparity estimation system that enables good performance in both accuracy and speed. The architecture implements an adaptive support weight stereo correspondence algorithm that integrates image segmentation information in an attempt to increase the robustness of the matching process. The article also presents hardware-oriented algorithmic modifications/optimization techniques that make the algorithm hardware-friendly and suitable for efficient dedicated hardware implementation. A comparison to the literature asserts that an FPGA implementation of the proposed architecture is among the fastest implementations in terms of million disparity estimations per second (MDE/s), and with an overall accuracy of 90.21\%, it presents an effective processing speed/disparity map accuracy trade-off.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Peon-quiros:2015:PLD, author = "Miguel Pe{\'o}n-quir{\'o}s and Alexandros Bartzas and Stylianos Mamagkakis and Francky Catthoor and Jos{\'e} Manuel Mend{\'\i}as and Dimitrios Soudris", title = "Placement of Linked Dynamic Data Structures over Heterogeneous Memories in Embedded Systems", journal = j-TECS, volume = "14", number = "2", pages = "37:1--37:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656208", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software applications use dynamic memory (allocated and deallocated in the system's heap) to handle dynamism in their working conditions. Embedded systems tend to include complex memory organizations but most techniques for dynamic memory management do not deal with the placement of data objects in physical memory modules. Additionally, the performance of hardware-controlled cache memories may be severely hindered when used with linked data structures. We therefore present a methodology to map dynamic data on the multilevel memory subsystem of embedded systems, taking advantage of any available memories (e.g., on-chip SRAMs) and avoiding interference with the cache memories. The resulting data placement uses an exclusive memory model and is compatible with existing techniques for managing static data. Our methodology helps the designer achieve reductions in energy consumption and execution time that can be obtained by an expert in an automated way while keeping control over the process through multiple configuration knobs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Segarra:2015:ASP, author = "Juan Segarra and Clemente Rodr{\'\i}guez and Rub{\'e}n Gran and Luis C. Aparicio and V{\'\i}ctor Vi{\~n}als", title = "{ACDC}: Small, Predictable and High-Performance Data Cache", journal = j-TECS, volume = "14", number = "2", pages = "38:1--38:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2677093", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In multitasking real-time systems, the worst-case execution time (WCET) of each task and also the effects of interferences between tasks in the worst-case scenario need to be calculated. This is especially complex in the presence of data caches. In this article, we propose a small instruction-driven data cache (256 bytes) that effectively exploits locality. It works by preselecting a subset of memory instructions that will have data cache replacement permission. Selection of such instructions is based on data reuse theory. Since each selected memory instruction replaces its own data cache line, it prevents pollution and performance in tasks becomes independent of the size of the associated data structures. We have modeled several memory configurations using the Lock-MS WCET analysis method. Our results show that, on average, our data cache effectively services 88\% of program data of the tested benchmarks. Such results double the worst-case performance of our tested multitasking experiments. In addition, in the worst case, they reach between 75\% and 89\% of the ideal case of always hitting in instruction and data caches. As well, we show that using partitioning on our proposed hardware only provides marginal benefits in worst-case performance, so using partitioning is discouraged. Finally, we study the viability of our proposal in the MiBench application suite by characterizing its data reuse, achieving hit ratios beyond 90\% in most programs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bellasi:2015:ERR, author = "Patrick Bellasi and Giuseppe Massari and William Fornaciari", title = "Effective Runtime Resource Management Using {Linux} Control Groups with the {BarbequeRTRM} Framework", journal = j-TECS, volume = "14", number = "2", pages = "39:1--39:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2658990", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "The extremely high technology process reached by silicon manufacturing (smaller than 32nm) has led to production of computational platforms and SoC, featuring a considerable amount of resources. Whereas from one side such multi- and many-core platforms show growing performance capabilities, from the other side they are more and more affected by power, thermal, and reliability issues. Moreover, the increased computational capabilities allows congested usage scenarios with workloads subject to mixed and time-varying requirements. Effective usage of the resources should take into account both the application requirements and resources availability, with an arbiter, namely a resource manager in charge to solve the resource contention among demanding applications. Current operating systems (OS) have only a limited knowledge about application-specific behaviors and their time-varying requirements. Dedicated system interfaces to collect such inputs and forward them to the OS (e.g., its scheduler) are thus an interesting research area that aims at integrating the OS with an ad hoc resource manager. Such a component can exploit efficient low-level OS interfaces and mechanisms to extend its capabilities of controlling tasks and system resources. Because of the specific tasks and timings of a resource manager, this component can be easily and effectively developed as a user-space extension lying in between the OS and the controlled application. This article, which focuses on multicore Linux systems, shows a portable solution to enforce runtime resource management decisions based on the standard control groups framework. A burst and a mixed workload analysis, performed on a multicore-based NUMA platform, have reported some promising results both in terms of performance and power saving.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schaumont:2015:IEP, author = "Patrick Schaumont and Maire O'Neill and Tim G{\"u}neysu", title = "Introduction for Embedded Platforms for Cryptography in the Coming Decade", journal = j-TECS, volume = "14", number = "3", pages = "40:1--40:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2745710", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2015:ESD, author = "Sandeep K. Shukla", title = "Editorial: Schizoid Design for Critical Embedded Systems", journal = j-TECS, volume = "14", number = "3", pages = "40e:1--40e:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2761728", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Howe:2015:PLB, author = "James Howe and Thomas P{\"o}ppelmann and M{\'a}ire O'Neill and Elizabeth O'Sullivan and Tim G{\"u}neysu", title = "Practical Lattice-Based Digital Signature Schemes", journal = j-TECS, volume = "14", number = "3", pages = "41:1--41:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2724713", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Digital signatures are an important primitive for building secure systems and are used in most real-world security protocols. However, almost all popular signature schemes are either based on the factoring assumption (RSA) or the hardness of the discrete logarithm problem (DSA/ECDSA). In the case of classical cryptanalytic advances or progress on the development of quantum computers, the hardness of these closely related problems might be seriously weakened. A potential alternative approach is the construction of signature schemes based on the hardness of certain lattice problems that are assumed to be intractable by quantum computers. Due to significant research advancements in recent years, lattice-based schemes have now become practical and appear to be a very viable alternative to number-theoretic cryptography. In this article, we focus on recent developments and the current state of the art in lattice-based digital signatures and provide a comprehensive survey discussing signature schemes with respect to practicality. Additionally, we discuss future research areas that are essential for the continued development of lattice-based cryptography.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boorghany:2015:CIL, author = "Ahmad Boorghany and Siavash Bayat Sarmadi and Rasool Jalili", title = "On Constrained Implementation of Lattice-Based Cryptographic Primitives and Schemes on Smart Cards", journal = j-TECS, volume = "14", number = "3", pages = "42:1--42:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700078", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most lattice-based cryptographic schemes with a security proof suffer from large key sizes and heavy computations. This is also true for the simpler case of authentication protocols that are used on smart cards as a very-constrained computing environment. Recent progress on ideal lattices has significantly improved the efficiency and made it possible to implement practical lattice-based cryptography on constrained devices. However, to the best of our knowledge, no previous attempts have been made to implement lattice-based schemes on smart cards. In this article, we provide the results of our implementation of several state-of-the-art lattice-based authentication protocols on smart cards and a microcontroller widely used in smart cards. Our results show that only a few of the proposed lattice-based authentication protocols can be implemented using limited resources of such constrained devices; however, cutting-edge ones are suitably efficient to be used practically on smart cards. Moreover, we have implemented fast Fourier transform (FFT) and discrete Gaussian sampling with different typical parameter sets, as well as versatile lattice-based public-key encryptions. These results have noticeable points that help to design or optimize lattice-based schemes for constrained devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aysu:2015:FRT, author = "Aydin Aysu and Bilgiday Yuce and Patrick Schaumont", title = "The Future of Real-Time Security: Latency-Optimized Lattice-Based Digital Signatures", journal = j-TECS, volume = "14", number = "3", pages = "43:1--43:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2724714", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in quantum computing have spurred a significant amount of research into public-key cryptographic algorithms that are resistant against postquantum cryptanalysis. Lattice-based cryptography is one of the important candidates because of its reasonable complexity combined with reasonable signature sizes. However, in a postquantum world, not only the cryptography will change but also the computing platforms. Large amounts of resource-constrained embedded systems will connect to a cloud of powerful server computers. We present an optimization technique for lattice-based signature generation on such embedded systems; our goal is to optimize latency rather than throughput. Indeed, on an embedded system, the latency of a single signature for user identification or message authentication is more important than the aggregate signature generation rate. We build a high-performance implementation using hardware\slash software codesign techniques. The key idea is to partition the signature generation scheme into offline and online phases. The signature scheme allows this separation because a large portion of the computation does not depend on the message to be signed and can be handled before the message is given. Then, we can map complex precomputation operations in software on a low-cost processor and utilize hardware resources to accelerate simpler online operations. To find the optimum hardware architecture for the target platform, we define and explore the design space and implement two design configurations. We realize our solutions on the Altera Cyclone-IV CGX150 FPGA. The implementation consists of a NIOS soft-core processor and a low-latency hash and polynomial multiplication engine. On average, the proposed low-latency architecture can generate a signature with a latency of 96 clock cycles at 40MHz, resulting in a response time of 2.4 $ \mu $s for a signing request. On equivalent platforms, this corresponds to a performance improvement of 33 and 105 times compared to previous hardware and software implementations, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{VonMaurich:2015:IQM, author = "Ingo {Von Maurich} and Tobias Oder and Tim G{\"u}neysu", title = "Implementing {QC--MDPC} {McEliece} Encryption", journal = j-TECS, volume = "14", number = "3", pages = "44:1--44:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700102", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With respect to performance, asymmetric code-based cryptography based on binary Goppa codes has been reported as a highly interesting alternative to RSA and ECC. A major drawback is still the large keys in the range between 50 and 100KB that prevented real-world applications of code-based cryptosystems so far. A recent proposal by Misoczki et al. showed that quasi-cyclic moderate-density parity-check (QC-MDPC) codes can be used in McEliece encryption, reducing the public key to just 0.6KB to achieve an 80-bit security level. In this article, we provide optimized decoding techniques for MDPC codes and survey several efficient implementations of the QC-MDPC McEliece cryptosystem. This includes high-speed and lightweight architectures for reconfigurable hardware, efficient coding styles for ARM's Cortex-M4 microcontroller, and novel high-performance software implementations that fully employ vector instructions. Finally, we conclude that McEliece encryption in combination with QC-MDPC codes not only enables high-performance implementations but also allows for lightweight designs on a wide range of different platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Massolino:2015:OSC, author = "Pedro Maat C. Massolino and Paulo S. L. M. Barreto and Wilson V. Ruggiero", title = "Optimized and Scalable Co-Processor for {McEliece} with Binary {Goppa} Codes", journal = j-TECS, volume = "14", number = "3", pages = "45:1--45:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2736284", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Asymmetric cryptographic primitives are essential to enable secure communications in public networks or public mediums. Such primitives can be deployed as software libraries or hardware co-processors, the latter being more commonly employed in systems on chip (SoC) scenarios, embedded devices, or application-specific servers. Unfortunately, the most commonly available solutions, based on RSA or elliptic curve cryptography (ECC), are highly processing intensive due to the underlying extended-precision modular arithmetic. Consequently, they are not available on highly constrained platforms. Aiming to tackle this issue, we here investigate an alternative asymmetric encryption scheme that relies on lightweight arithmetic: McEliece. This scheme is especially appealing because, being based on error correction codes, it displays a simpler arithmetic and leads to better performance when compared to RSA or ECC. To evaluate the implementation of this scheme in hardware, we propose and analyze a flexible architecture whose security level and time versus area usage characteristics can be reconfigured as desired. The proposed architecture is suitable to all usual security levels, ranging from 80 to 256 bits. It is also very efficient, being able to perform data decryption with binary Goppa codes in 56$ \mu $s with 3,402 slices on a Xilinx Spartan-3AN FPGA, whereas the best-known result in the literature for the same FPGA is 115$ \mu $s with 7,331 slices. Alternatively, the architecture can operate with quasi-dyadic Goppa (QD-Goppa) codes, which involves smaller keys than traditional binary Goppa codes. In the latter case, for an 80-bit security level, the decryption operation can take from 1.1ms with 1,129 slices to 68$ \mu $s with 8,268 slices. By choosing a more hardware-friendly decoding algorithm, focusing hardware resources on most bottleneck operations and sharing hardware resource for two different algorithms, better results than the those in the literature were obtained.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Millo:2015:MAD, author = "Jean-Vivien Millo and Emilien Kofman and Robert {De Simone}", title = "Modeling and Analyzing Dataflow Applications on {NoC}-Based Many-Core Architectures", journal = j-TECS, volume = "14", number = "3", pages = "46:1--46:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700081", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The advent of chip-level parallel architectures prompted a renewal of interest into dataflow process networks. The trend is to model an application independently from the architecture, then the model is morphed to best fit the target architecture. One downplayed aspect is the mapping of communications through the on-chip topology. The cost of such communications is often prevalent with regard to computations. This article establishes a dataflow process network called K-periodically Routed Graph (KRG), which serves the role of representing the various routing decisions during the transformation of a genuine application into a architecture-aware version for this application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Davis:2015:GPM, author = "Robert I. Davis and Alan Burns and Jose Marinho and Vincent Nelis and Stefan M. Petters and Marko Bertogna", title = "Global and Partitioned Multiprocessor Fixed Priority Scheduling with Deferred Preemption", journal = j-TECS, volume = "14", number = "3", pages = "47:1--47:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2739954", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces schedulability analysis for Global Fixed Priority Scheduling with Deferred Preemption (gFPDS) for homogeneous multiprocessor systems. gFPDS is a superset of Global Fixed Priority Preemptive Scheduling (gFPPS) and Global Fixed Priority Nonpreemptive Scheduling (gFPNS). We show how schedulability can be improved using gFPDS via appropriate choice of priority assignment and final nonpreemptive region lengths, and provide algorithms that optimize schedulability in this way. Via an experimental evaluation we compare the performance of multiprocessor scheduling using global approaches: gFPDS, gFPPS, and gFPNS, and also partitioned approaches employing FPDS, FPPS, and FPNS on each processor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tilli:2015:GCR, author = "Andrea Tilli and Andrea Bartolini and Matteo Cacciari and Luca Benini", title = "Guaranteed Computational Resprinting via Model-Predictive Control", journal = j-TECS, volume = "14", number = "3", pages = "48:1--48:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2724715", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today and future many-core systems are facing the utilization wall and dark silicon problems, for which not all the processing engines can be powered at the same time as this will lead to a power consumption higher than the Total Design Power (TDP) budget. Recently, computational sprinting approaches addressed the problem by exploiting the intrinsic thermal capacitance of the chip and the properties of common applications, which require intense, but temporary, use of resources. The thermal capacitance, possibly augmented with phase change materials, enables the temporary activation of all the resources simultaneously, although they largely exceed the steady-state thermal design power. In this article, we present an innovative and low-overhead hierarchical model-predictive controller for managing thermally safe sprinting with predictable resprinting rate, which ensures the correct execution of mixed-criticality tasks. Well-targeted simulations, also based on real workload benchmarks, show the applicability and the effectiveness of our solution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sayyah:2015:VPB, author = "Parinaz Sayyah and Mihai T. Lazarescu and Sara Bocchio and Emad Ebeid and Gianluca Palermo and Davide Quaglia and Alberto Rosti and Luciano Lavagno", title = "Virtual Platform-Based Design Space Exploration of Power-Efficient Distributed Embedded Applications", journal = j-TECS, volume = "14", number = "3", pages = "49:1--49:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2723161", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Networked embedded systems are essential building blocks of a broad variety of distributed applications ranging from agriculture to industrial automation to healthcare and more. These often require specific energy optimizations to increase the battery lifetime or to operate using energy harvested from the environment. Since a dominant portion of power consumption is determined and managed by software, the software development process must have access to the sophisticated power management mechanisms provided by state-of-the-art hardware platforms to achieve the best tradeoff between system availability and reactivity. Furthermore, internode communications must be considered to properly assess the energy consumption. This article describes a design flow based on a SystemC virtual platform including both accurate power models of the hardware components and a fast abstract model of the wireless network. The platform allows both model-driven design of the application and the exploration of power and network management alternatives. These can be evaluated in different network scenarios, allowing one to exploit power optimization strategies without requiring expensive field trials. The effectiveness of the approach is demonstrated via experiments on a wireless body area network application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tamas-Selicean:2015:DOM, author = "Domitian Tamas-Selicean and Paul Pop", title = "Design Optimization of Mixed-Criticality Real-Time Embedded Systems", journal = j-TECS, volume = "14", number = "3", pages = "50:1--50:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700103", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we are interested in implementing mixed-criticality real-time embedded applications on a given heterogeneous distributed architecture. Applications have different criticality levels, captured by their Safety-Integrity Level (SIL), and are scheduled using static-cyclic scheduling. According to certification standards, mixed-criticality tasks can be integrated onto the same architecture only if there is enough spatial and temporal separation among them. We consider that the separation is provided by partitioning, such that applications run in separate partitions, and each partition is allocated several time slots on a processor. Tasks of different SILs can share a partition only if they are all elevated to the highest SIL among them. Such elevation leads to increased development costs, which increase dramatically with each SIL. Tasks of higher SILs can be decomposed into redundant structures of lower SIL tasks. We are interested to determine (i) the mapping of tasks to processors, (ii) the assignment of tasks to partitions, (iii) the decomposition of tasks into redundant lower SIL tasks, (iv) the sequence and size of the partition time slots on each processor, and (v) the schedule tables, such that all the applications are schedulable and the development costs are minimized. We have proposed a Tabu Search-based approach to solve this optimization problem. The proposed algorithm has been evaluated using several synthetic and real-life benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Papagiannopoulou:2015:EEH, author = "Dimitra Papagiannopoulou and Giuseppe Capodanno and Tali Moreshet and Maurice Herlihy and R. Iris Bahar", title = "Energy-Efficient and High-Performance Lock Speculation Hardware for Embedded Multicore Systems", journal = j-TECS, volume = "14", number = "3", pages = "51:1--51:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700097", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems are becoming increasingly common in everyday life and like their general-purpose counterparts, they have shifted towards shared memory multicore architectures. However, they are much more resource constrained, and as they often run on batteries, energy efficiency becomes critically important. In such systems, achieving high concurrency is a key demand for delivering satisfactory performance at low energy cost. In order to achieve this high concurrency, consistency across the shared memory hierarchy must be accomplished in a cost-effective manner in terms of performance, energy, and implementation complexity. In this article, we propose Embedded-Spec, a hardware solution for supporting transparent lock speculation, without the requirement for special supporting instructions. Using this approach, we evaluate the energy consumption and performance of a suite of benchmarks, exploring a range of contention management and retry policies. We conclude that for resource-constrained platforms, lock speculation can provide real benefits in terms of improved concurrency and energy efficiency, as long as the underlying hardware support is carefully configured.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Santinelli:2015:PCP, author = "Luca Santinelli and Liliana Cucu-Grosjean", title = "A Probabilistic Calculus for Probabilistic Real-Time Systems", journal = j-TECS, volume = "14", number = "3", pages = "52:1--52:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2717113", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Challenges within real-time research are mostly in terms of modeling and analyzing the complexity of actual real-time embedded systems. Probabilities are effective in both modeling and analyzing embedded systems by increasing the amount of information for the description of elements composing the system. Elements are tasks and applications that need resources, schedulers that execute tasks, and resource provisioning that satisfies the resource demand. In this work, we present a model that considers component-based real-time systems with component interfaces able to abstract both the functional and nonfunctional requirements of components and the system. Our model faces probabilities and probabilistic real-time systems unifying in the same framework probabilistic scheduling techniques and compositional guarantees varying from soft to hard real time. We provide an algebra to work with the probabilistic notation developed and form an analysis in terms of sufficient probabilistic schedulability conditions for task systems with either preemptive fixed-priority or earliest deadline first scheduling paradigms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anand:2015:ICL, author = "Kapil Anand and Rajeev Barua", title = "Instruction-Cache Locking for Improving Embedded Systems Performance", journal = j-TECS, volume = "14", number = "3", pages = "53:1--53:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700100", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cache memories in embedded systems play an important role in reducing the execution time of applications. Various kinds of extensions have been added to cache hardware to enable software involvement in replacement decisions, improving the runtime over a purely hardware-managed cache. Novel embedded systems, such as Intel's XScale and ARM Cortex processors, facilitate locking one or more lines in cache; this feature is called cache locking. We present a method in for instruction-cache locking that is able to reduce the average-case runtime of a program. We demonstrate that the optimal solution for instruction cache locking can be obtained in polynomial time. However, a fundamental lack of correlation between cache hardware and software program points renders such optimal solutions impractical. Instead, we propose two practical heuristics-based approaches to achieve cache locking. First, we present a static mechanism for locking the cache, in which the locked contents of the cache are kept fixed over the execution of the program. Next, we present a dynamic mechanism that accounts for changing program requirements at runtime. We devise a cost--benefit model to discover the memory addresses that should be locked in the cache. We implement our scheme inside a binary rewriter, widening the applicability of our scheme to binaries compiled using any compiler. Results obtained on a suite of MiBench benchmarks show that our static mechanism results in 20\% improvement in the instruction-cache miss rate on average and up to 18\% improvement in the execution time on average for applications having instruction accesses as a bottleneck, compared to no cache locking. The dynamic mechanism improves the cache miss rate by 35\% on average and execution time by 32\% on instruction-cache-constrained applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cooke:2015:FSM, author = "Patrick Cooke and Lu Hao and Greg Stitt", title = "Finite-State-Machine Overlay Architectures for Fast {FPGA} Compilation and Application Portability", journal = j-TECS, volume = "14", number = "3", pages = "54:1--54:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700082", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Despite significant advantages, wider usage of field-programmable gate arrays (FPGAs) has been limited by lengthy compilation and a lack of portability. Virtual-architecture overlays have partially addressed these problems, but previous work focuses mainly on heavily pipelined applications with minimal control requirements. We expand previous work by enabling more flexible control via overlay architectures for finite-state machines. Although not appropriate for control-intensive circuits, the presented architectures reduced compilation times of control changes in a convolution case study from 7 hours to less than 1 second, with no performance overhead and an area overhead of 0.2\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Watkins:2015:UNT, author = "Lanier Watkins and William H. Robinson and Raheem Beyah", title = "Using Network Traffic to Infer Hardware State: a Kernel-Level Investigation", journal = j-TECS, volume = "14", number = "3", pages = "55:1--55:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700094", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we illustrate that the boundary of a general-purpose node can be extended into the network by extracting information from network traffic generated by that general-purpose node to infer the state of its hardware components. This information is represented in a delay signature latent within the network traffic. In contrast, the traditional approach to determine the internal state of a node's resources meant that a software application with internal processes had to be resident on the node. The aforementioned delay signature is the keystone that provides a correlation between network traffic and the internal state of the source node. We characterize this delay signature by (1) identifying the different types of assembly language instructions that source this delay and (2) describing how architectural techniques, such as instruction pipelining and caching, give rise to this delay signature. In theory, highly utilized nodes (due to multiple threads) will contain excessive context switching and contention for shared resources. One important shared resource is main memory, and excessive use of this resource by applications and internal processes eventually leads to a decrease in cache efficiency that eventually stalls the instruction pipeline. Our results support this theory; specifically, we have observed that excessive context switching in active applications increases the effective memory access time and wastes precious CPU cycles, thus adding additional delay to the execution of load, store, and other instructions. Because the operating system (OS) kernel accesses memory to send network packets, the delay signature is induced into network traffic in situations where user-level utilization is high. We demonstrate this theory in two case studies: (1) resource discovery in cluster grids and (2) network-based detection of bitcoin mining on compromised nodes.", acknowledgement = ack-nhfb, acmid = "2700094", ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", keywords = "LEON4 processor, clusters assembly language instructions, grid computing, passive resource discovery", pagecount = "22", } @Article{Kerrison:2015:EMS, author = "Steve Kerrison and Kerstin Eder", title = "Energy Modeling of Software for a Hardware Multithreaded Embedded Microprocessor", journal = j-TECS, volume = "14", number = "3", pages = "56:1--56:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700104", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article examines a hardware multithreaded microprocessor and discusses the impact such an architecture has on existing software energy modeling techniques. A framework is constructed for analyzing the energy behavior of the XMOS XS1-L multithreaded processor and a variation on existing software energy models is proposed, based on analysis of collected energy data. It is shown that by combining execution statistics with sufficient data on the processor's thread activity and instruction execution costs, a multithreaded software energy model used with Instruction Set Simulation can yield an average error margin of less than 7\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cilardo:2015:ECA, author = "Alessandro Cilardo and Edoardo Fusella and Luca Gallo and Antonino Mazzeo", title = "Exploiting Concurrency for the Automated Synthesis of {MPSoC} Interconnects", journal = j-TECS, volume = "14", number = "3", pages = "57:1--57:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700075", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multiprocessor Systems-on-Chip (MPSoC) applications can rely today on a very large spectrum of interconnection topologies potentially meeting given communication requirements, determining various trade-offs between cost and performance. Building interconnects that enable concurrent communication tasks introduces decisive opportunities for reducing the overall communication latency. This work identifies three levels of parallelism at the interconnect level: global parallelism across different independent domains; local or intradomain parallelism, relying on inherently concurrent interconnect components such as crossbars; and interdomain parallelism, where multiple concurrent paths across different local domains are exploited. We propose an automated methodology to search the design space, aimed at maximizing the exploitation of these forms of parallelism. The approach also takes into consideration possible dependencies between communication tasks, which further constrains the design space, making the identification of a feasible solution more challenging. By jointly solving a scheduling and interconnect synthesis problem, the methodology turns the description of the application communication requirements, including data dependencies, into an on-chip synthesizable interconnection structure along with a communication schedule satisfying given area constraints. The article thoroughly describes the formalisms and the methodology used to derive such optimized heterogeneous topologies. It also discusses some case studies emphasizing the impact of the proposed approach and highlighting the essential differences with a few other solutions presented in the technical literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Geeraerts:2015:VCA, author = "Gilles Geeraerts and Alexander Heu{\ss}ner and Jean-Fran{\c{c}}ois Raskin", title = "On the Verification of Concurrent, Asynchronous Programs with Waiting Queues", journal = j-TECS, volume = "14", number = "3", pages = "58:1--58:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700072", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, new libraries, such as Grand Central Dispatch (GCD), have been proposed to directly harness the power of multicore platforms and to make the development of concurrent software more accessible to software engineers. When using such a library, the programmer writes so-called blocks, which are chunks of code, and dispatches them using synchronous or asynchronous calls to several types of waiting queues. A scheduler is then responsible for dispatching those blocks among the available cores. Blocks can synchronize via a global memory. In this article, we propose Queue-Dispatch Asynchronous Systems as a mathematical model that faithfully formalizes the synchronization mechanisms and behavior of the scheduler in those systems. We study in detail their relationships to classical formalisms such as pushdown systems, Petri nets, Fifo systems, and counter systems. Our main technical contributions are precise worst-case complexity results for the Parikh coverability problem and the termination problem for several subclasses of our model. We also consider an extension of Qdas with a fork-join mechanism. Adding fork-join to any of the subclasses that we have identified leads to undecidability of the coverability problem. This motivates the study of over-approximations. Finally, we consider handmade abstractions as a practical way of verifying programs that cannot be faithfully modeled by decidable subclasses of Qdas.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2015:COM, author = "Kai Huang and Min Yu and Rongjie Yan and Xiaomeng Zhang and Xiaolang Yan and Lisane Brisolara and Ahmed Amine Jerraya and Jiong Feng", title = "Communication Optimizations for Multithreaded Code Generation from {Simulink} Models", journal = j-TECS, volume = "14", number = "3", pages = "59:1--59:??", month = may, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2644811", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:08:56 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Communication frequency is increasing with the growing complexity of emerging embedded applications and the number of processors in the implemented multiprocessor SoC architectures. In this article, we consider the issue of communication cost reduction during multithreaded code generation from partitioned Simulink models to help designers in code optimization to improve system performance. We first propose a technique combining message aggregation and communication pipeline methods, which groups communications with the same destinations and sources and parallelizes communication and computation tasks. We also present a method to apply static analysis and dynamic emulation for efficient communication buffer allocation to further reduce synchronization cost and increase processor utilization. The existing cyclic dependency in the mapped model may hinder the effectiveness of the two techniques. We further propose a set of optimizations involving repartition with strongly connected threads to maximize the degree of communication reduction and preprocessing strategies with available delays in the model to reduce the number of communication channels that cannot be optimized. Experimental results demonstrate the advantages of the proposed optimizations with 11--143\% throughput improvement.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mathew:2015:NMB, author = "Jimson Mathew and Rajat Subhra Chakraborty and Durga Prasad Sahoo and Yuanfan Yang and Dhiraj K. Pradhan", title = "A Novel Memristor-Based Hardware Security Primitive", journal = j-TECS, volume = "14", number = "3", pages = "60:1--60:??", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2736285", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Apr 21 17:21:32 MDT 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Memristor is an exciting new addition to the repertoire of fundamental circuit elements. Alternatives to many security protocols originally employing traditional mathematical cryptography involve novel hardware security primitives, such as Physically Unclonable Functions (PUFs). In this article, we propose a novel hybrid memristor-CMOS PUF circuit and demonstrate its suitability through extensive simulations of environmental and process variation effects. The proposed PUF circuit has substantially less hardware overhead than previously proposed memristor-based PUF circuits while being inherently resistant to machine learning-based modeling attacks because of challenge-dependent delays of the memristor stages. The proposed PUF can be conveniently used in many security applications and protocols based on hardware-intrinsic security.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2015:EBD, author = "Sandeep K. Shukla", title = "Editorial: Big Data, {Internet of Things}, Cybersecurity --- A New Trinity of Embedded Systems Research", journal = j-TECS, volume = "14", number = "4", pages = "61:1--61:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2820608", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Barkaoui:2015:GES, author = "Kamel Barkaoui and Luca Bernardinello and Andrey Mokhov", title = "Guest Editorial for Special Issue Application of Concurrency to System Design", journal = j-TECS, volume = "14", number = "4", pages = "62:1--62:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2809925", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Furbach:2015:MMA, author = "Florian Furbach and Roland Meyer and Klaus Schneider and Maximilian Senftleben", title = "Memory-Model-Aware Testing: a Unified Complexity Analysis", journal = j-TECS, volume = "14", number = "4", pages = "63:1--63:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2753761", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To improve the performance of the memory system, multiprocessors implement weak memory consistency models. Weak memory models admit different views of the processes on their load and store instructions, thus allowing for computations that are not sequentially consistent. Program analyses have to take into account the memory model of the targeted hardware. This is challenging because numerous memory models have been developed, and every memory model requires its own analysis. In this article, we study a prominent approach to program analysis: testing. The testing problem takes as input sequences of operations, one for each process in the concurrent program. The task is to check whether these sequences can be interleaved to an execution of the entire program that respects the constraints of a memory model under consideration. We determine the complexity of the testing problem for most of the known memory models. Moreover, we study the impact on the complexity of parameters, such as the number of concurrent processes, the length of their executions, and the number of shared variables. What differentiates our contribution from related results is a uniform approach that avoids considering each memory model on its own. We build upon work of Steinke and Nutt. They showed that the existing memory models form a hierarchy where one model is called weaker than another one if it includes the latter's behavior. Using the Steinke-Nutt hierarchy, we develop three general concepts that allow us to quickly determine the complexity of a testing problem. First, we generalize the technique of problem reductions from complexity theory. So-called range reductions propagate hardness results between memory models, and we apply them to establish NP lower bounds for the stronger memory models. Second, for the weaker models, we present polynomial-time testing algorithms that are inspired by determinization algorithms for automata. Finally, we describe a single SAT encoding of the testing problem that works for all memory models in the Steinke-Nutt hierarchy to prove their membership in NP. Our results are general enough to carry over to future weak memory models. Moreover, they show that SAT solvers are adequate tools for testing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Knapik:2015:ASB, author = "Michal Knapik and Artur Meski and Wojciech Penczek", title = "Action Synthesis for Branching Time Logic: Theory and Applications", journal = j-TECS, volume = "14", number = "4", pages = "64:1--64:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The article introduces a parametric extension of Action-Restricted Computation Tree Logic called pmARCTL. A symbolic fixed-point algorithm providing a solution to the exhaustive parameter synthesis problem is proposed. The parametric approach allows for an in-depth system analysis and synthesis of the correct parameter values. The time complexity of the problem and the algorithm is provided. An existential fragment of pmARCTL (pmEARCTL) is identified, in which all of the solutions can be generated from a minimal and unique base. A method for computing this base using symbolic methods is provided. The prototype tool SPATULA implementing the algorithm is applied to the analysis of three benchmarks: faulty Train-Gate-Controller, Peterson's mutual exclusion protocol, and a generic pipeline processing network. The experimental results show efficiency and scalability of our approach compared to the naive solution to the problem.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Siirtola:2015:PMI, author = "Antti Siirtola and Keijo Heljanko", title = "Parametrised Modal Interface Automata", journal = j-TECS, volume = "14", number = "4", pages = "65:1--65:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2776892", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Interface theories (ITs) enable us to analyse the compatibility interfaces and refine them while preserving their compatibility. However, most ITs are for finite state interfaces, whereas computing systems are often parametrised involving components, the number of which cannot be fixed. We present, to our knowledge, the first IT that allows us to specify a parametric number of interfaces. Moreover, we provide a fully algorithmic procedure, implemented in a tool, for checking the compatibility of and refinement between parametrised interfaces. Finally, we show that the restrictions of the technique are necessary; removing any of them renders the refinement checking problem undecidable.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cotard:2015:SHR, author = "Sylvain Cotard and Audrey Queudet and Jean-Luc B{\'e}chennec and S{\'e}bastien Faucou and Yvon Trinquet", title = "{STM--HRT}: a Robust and Wait-Free {STM} for Hard Real-Time Multicore Embedded Systems", journal = j-TECS, volume = "14", number = "4", pages = "66:1--66:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2786979", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces STM-HRT, a nonblocking wait-free software transactional memory (STM) for hard real-time (HRT) multicore embedded systems. Resource access control in HRT systems is usually implemented with lock-based synchronization. However, these mechanisms may lead to deadlocks or starvations and do not scale well with the number of cores. Most existing nonblocking STM are not suitable for HRT systems, because it is not possible to find an upper bound of the execution time for each task. In this article, we show how STM-HRT can be a robust solution for resource sharing in HRT multicore systems. We provide a detailed description of STM-HRT architecture. We propose a set of arguments to establish the functional correctness of its concurrency control protocol. Finally, as part of a real-time analysis, we derive upper bounds on the computations required to access shared data under STM-HRT.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bujtor:2015:FSM, author = "Ferenc Bujtor and Walter Vogler", title = "Failure Semantics for Modal Transition Systems", journal = j-TECS, volume = "14", number = "4", pages = "67:1--67:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2746336", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the aim to preserve deadlock freedom, we define a new refinement preorder for modal transition systems (MTSs), using an MTS-specific variant of testing inspired by De Nicola and Hennessy. We characterize this refinement with a kind of failure semantics and show that it ``supports itself,'' for example, in the sense of thoroughness-in contrast to standard modal refinements. We present a conjunction operator with respect to our new refinement, which is quite different from existing ones. It always returns an MTS-again in contrast to the case of modal refinement. Finally, we also consider De Nicola's and Hennessy's may- and must-testing, where the latter leads to a semantics that is also compositional for hiding.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DeGroote:2015:IAC, author = "Robert {De Groote} and Philip K. F. H{\"o}lzenspies and Jan Kuper and Gerard J. M. Smit", title = "Incremental Analysis of Cyclo-Static Synchronous Dataflow Graphs", journal = j-TECS, volume = "14", number = "4", pages = "68:1--68:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2792981", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a mathematical characterisation of admissible schedules of cyclo-static dataflow (csdf) graphs. We demonstrate how algebra ic manipulation of this characterization is related to unfolding csdf actors and how this manipulation allows csdf graphs to be transformed into mrsdf graphs that are equivalent, in the sense that they admit the same set of schedules. The presented transformation allows the rich set of existing analysis techniques for mrsdf graphs to be applied to csdf graphs and generalizes the well-known transformations from csdf and mrsdf into hsdf. Moreover, it gives rise to an incremental approach to the analysis of csdf graphs, where approximate analyses are combined with exact transformations. We show the applicability of this incremental approach by demonstrating its effectiveness on the problem of optimizing buffer sizes under a throughput constraint.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Germanos:2015:DUW, author = "Vasileios Germanos and Stefan Haar and Victor Khomenko and Stefan Schwoon", title = "Diagnosability under Weak Fairness", journal = j-TECS, volume = "14", number = "4", pages = "69:1--69:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2832910", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In partially observed Petri nets, diagnosis is the task of detecting whether the given sequence of observed labels indicates that some unobservable fault has occurred. Diagnosability is an associated property of the Petri net, stating that in any possible execution, an occurrence of a fault can eventually be diagnosed. In this article, we consider diagnosability under the weak fairness (WF) assumption, which intuitively states that no transition from a given set can stay enabled forever-it must eventually either fire or be disabled. We show that a previous approach to WF-diagnosability in the literature has a major flaw and present a corrected notion. Moreover, we present an efficient method for verifying WF-diagnosability based on a reduction to LTL-X model checking. An important advantage of this method is that the LTL-X formula is fixed-in particular, the WF assumption does not have to be expressed as a part of it (which would make the formula length proportional to the size of the specification), but rather the ability of existing model checkers to handle weak fairness directly is exploited.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2015:SGP, author = "Gung-Yu Pan and Jed Yang and Jing-Yang Jou and Bo-Cheng Charles Lai", title = "Scalable Global Power Management Policy Based on Combinatorial Optimization for Multiprocessors", journal = j-TECS, volume = "14", number = "4", pages = "70:1--70:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2811404", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multiprocessors have become the main architecture trend in modern systems due to the superior performance; nevertheless, the power consumption remains a critical challenge. Global power management (GPM) aims at dynamically finding the power state combination that satisfies the power budget constraint while maximizing the overall performance (or vice versa). Due to the increasing number of cores in a multiprocessor system, the scalability of GPM policies has become critical when searching satisfactory state combinations within acceptable time. This article proposes a highly scalable policy based on combinatorial optimization with theoretical proofs, whereas previous works take exhaustive search or heuristic methods. The proposed policy first applies an optimum algorithm to construct a state combination table in pseudo--polynomial time using dynamic programming. Then, the state combination is assigned to cores with minimum transition cost in linear time by mapping to the network flow problem. Simulation results show that the proposed policy achieves better system performance for any given power budget when compared to the state-of-the-art heuristic. Furthermore, the proposed policy demonstrates its prominent scalability with 125 times faster policy runtime for 512 cores.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lu:2015:ECA, author = "Jing Lu and Ke Bai and Aviral Shrivastava", title = "Efficient Code Assignment Techniques for Local Memory on Software Managed Multicores", journal = j-TECS, volume = "14", number = "4", pages = "71:1--71:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2738039", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Scaling the memory hierarchy is a major challenge when we scale the number of cores in a multicore processor. Software Managed Multicore (SMM) architectures come up as one of the promising solutions. In an SMM architecture, there are no caches, and each core has only a local scratchpad memory [Banakar et al. 2002]. As the local memory usually is small, large applications cannot be directly executed on it. Code and data of the task mapped to each core need to be managed between global memory and local memory. This article solves the problem of efficiently managing code on an SMM architecture. The primary requirement of generating efficient code assignments is a correct management cost model. In this article, we address this problem by proposing a cost calculation graph. In addition, we develop two heuristics CMSM (Code Mapping for Software Managed multicores) and CMSM\_advanced that result in efficient code management execution on the local scratchpad memory. Experimental results collected after executing applications from the MiBench suite [Guthaus et al. 2001] demonstrate that merely by adopting the correct management cost calculation, even using previous code assignment schemes, we can improve performance by an average of 12\%. Combining the correct management cost model and a more optimized code mapping algorithm together, our heuristics can reduce runtime in more than 80\% of the cases, and by up to 20\% on our set of benchmarks, compared to the state-of-the-art code assignment approach [Jung et al. 2010]. When compared with Instruction-level Parallelism (ILP) results, CMSM\_advanced performs an average of 5\% worse. We also simulate the benchmarks on a cache-based system, and find that the code management overhead on SMM core with our code management is much less than memory latency of a cache-based system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kamal:2015:OHC, author = "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and Massoud Pedram", title = "{OPLE}: a Heuristic Custom Instruction Selection Algorithm Based on Partitioning and Local Exploration of Application Dataflow Graphs", journal = j-TECS, volume = "14", number = "4", pages = "72:1--72:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2764458", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, a heuristic custom instruction (CI) selection algorithm is presented. The proposed algorithm, which is called OPLE for ``Optimization based on Partitioning and Local Exploration,'' uses a combination of greedy and optimal optimization methods. It searches for the near-optimal solution by reducing the search space based on partitioning the identified CI set. The partitioning of the identified set guarantees the success of the algorithm independent of the size of the identified set. First, the algorithm finds the near-optimal CIs from the candidate CIs for each part. Next, the suggested CIs from different parts are combined to determine the final selected CI set. To improve the set of the selected CIs, the solution is evolved by calling the algorithm iteratively. The efficacy of the algorithm is assessed by comparing its performance to those of optimal and nonoptimal methods. A comparative study is performed for a number of benchmarks under different area budgets and I/O constraints. The results reveal higher speedups for the OPLE algorithm, especially for larger identified candidate sets and/or small area budgets compared to those of the nonoptimal solutions. Compared to the nonoptimal techniques, the proposed algorithm provides 30\% higher speedup improvement on average. The maximum improvement is 117\%. The results also demonstrate that in many cases OPLE is able to find the optimal solution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Palossi:2015:CDP, author = "Daniele Palossi and Martino Ruggiero and Luca Benini", title = "{$3$D} {CV} Descriptor on Parallel Heterogeneous Platforms", journal = j-TECS, volume = "14", number = "4", pages = "73:1--73:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2733377", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded three-dimensional (3D) Computer Vision (CV) is considered a technology enabler for future consumer applications, attracting a wide interest in academia and industry. However, 3D CV processing is a computation-intensive task. Its high computational cost is directly related to the processing of 3D point clouds, with the 3D descriptor computation representing one of the main bottlenecks. Understanding the main computational challenges of 3D CV applications, as well as the key characteristics, enabling features, and limitations of current computing platforms, is clearly strategic to identify the directions of evolution for future embedded processing systems targeting 3D CV. In this work, an innovative and complex 3D descriptor (called SHOT) has been ported on a high-end and an embedded computing platform. The high-end system is composed by a high-performance Intel CPU coupled with a Nvidia GPU. The embedded platform is, instead, composed by an ARM-based processor, coupled with the STHORM accelerator. STHORM is a many-core low-power accelerator developed by ST Microelectronics, featuring up to 64 computational units. The SHOT descriptor has been parallelized using the OpenCL programming model for both platforms. Finally, we have performed an in-depth performance comparison and analysis between general-purpose processors and accelerators in both high-end and embedded domains, discussing and highlighting the main differences in the Hardware/Software (HW/SW) design methodologies and approaches between high-end and embedded systems targeting 3D CV applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2015:CIB, author = "Guohui Li and Yi Zhang and Jianjun Li", title = "{Crenel}-Interval-Based Dynamic Power Management for Periodic Real-Time Systems", journal = j-TECS, volume = "14", number = "4", pages = "74:1--74:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2744197", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In order to save the energy consumption of real-time embedded systems, the integration of Dynamic Voltage and Frequency Scaling (DVFS) and Device Power Management (DPM) techniques has been well studied. In this article, we propose a new energy management scheme for periodic real-time tasks with implicit deadlines. We mainly focus on the DPM part by presenting a novel approach to the real-time DPM problem. Specifically, we first identify intervals for each device, which we refer to as Crenel Intervals, by partitioning the Earliest Deadline First (EDF) schedule of the tasks that need to access the device into successive intervals. The principle for identifying Crenel Intervals is that for each task, there is only one deadline located in each Crenel Interval. Next, targeting at a single device model and a multiple device model, respectively, we propose the CI-EDF and CI-EDF$^m$ algorithms to schedule task instances in each Crenel Interval, so as to form long and continuous slacks in each Crenel Interval but without jeopardizing any task deadlines. Then, the slack in the Crenel Intervals can be utilized to perform not only DPM, but also DVFS. The experimental results show that our approaches can achieve considerably more energy savings than existing techniques with comparable quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mihajlovic:2015:AAR, author = "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren J. Gross", title = "Architecture-Aware Real-Time Compression of Execution Traces", journal = j-TECS, volume = "14", number = "4", pages = "75:1--75:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2766449", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In recent years, on-chip trace generation has been recognized as a solution to the debugging of increasingly complex software. An execution trace can be seen as the most fundamentally useful type of trace, allowing the execution path of software to be determined post hoc. However, the bandwidth required to output such a trace can be excessive. Our architecture-aware trace compression (AATC) scheme adds an on-chip branch predictor and branch target buffer to reduce the volume of execution trace data in real time through on-chip compression. Novel redundancy reduction strategies are employed, most notably in exploiting the widespread use of linked branches and the compiler-driven movement of return addresses between link register, stack, and program counter. In doing so, the volume of branch target addresses is reduced by 52\%, whereas other algorithmic improvements further decrease trace volume. An analysis of spatial and temporal redundancy in the trace stream allows a comparison of encoding strategies to be made for systematically increasing compression performance. A combination of differential, Fibonacci, VarLen, and Move-to-Front encodings are chosen to produce two compressor variants: a performance-focused xAATC that encodes 56.5 instructions/bit using 24,133 gates and an area-efficient fAATC that encodes 48.1 instructions/bit using only 9,854 gates.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bak:2015:SPD, author = "Stanley Bak and Zhenqi Huang and Fardin Abdi Taghi Abad and Marco Caccamo", title = "Safety and Progress for Distributed Cyber-Physical Systems with Unreliable Communication", journal = j-TECS, volume = "14", number = "4", pages = "76:1--76:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2739046", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPSs) may interact and manipulate objects in the physical world, and therefore formal guarantees about their behavior are strongly desired. Static-time proofs of safety invariants, however, may be intractable for systems with distributed physical-world interactions. This is further complicated when realistic communication models are considered, for which there may not be bounds on message delays, or even when considering that messages will eventually reach their destination. In this work, we address the challenge of proving safety and progress in distributed CPSs communicating over an unreliable communication layer. We show that for this type of communication model, system safety is closely related to the results of a hybrid system's reachability computation, which can be computed at runtime. However, since computing reachability at runtime may be computationally intensive, we provide an approach that moves significant parts of the computation to design time. This approach is demonstrated with a case study of a simulation of multiple vehicles moving within a shared environment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Catania:2015:PSR, author = "Vincenzo Catania and Andrea Araldo and Davide Patti", title = "Parameter Space Representation of {Pareto} Front to Explore Hardware--Software Dependencies", journal = j-TECS, volume = "14", number = "4", pages = "77:1--77:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2764457", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems design requires conflicting objectives to be optimized with an appropriate choice of hardware-software parameters. A simulation campaign can guide the design in finding the best trade-offs, but due to the big number of possible configurations, it is often infeasible to simulate them all. For these reasons, design space exploration algorithms aim at finding near-optimal system configurations by simulating only a subset of them. In this work, we present PS, a new multiobjective optimization algorithm, and evaluate it in the context of the embedded system design. The basic idea is to recognize interesting regions-that is, regions of the configuration space that provide better configurations with respect to other ones. PS evaluates more configurations in the interesting regions while less thoroughly exploring the rest of the configuration space. After a detailed formal description of the algorithm and the underlying concepts, we show a case study involving the hardware/software exploration of a VLIW architecture. Qualitative and quantitative comparisons of PS against a well-known multiobjective genetic approach demonstrate that while not outperforming it in terms of Pareto dominance, the proposed approach can balance the uniformity and granularity qualities of the solutions found, obtaining more extended Pareto fronts that provide a wider view of the potentiality of the designed device. Therefore, PS represents a further valid choice for the designer when objective constrains allow it.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Matthews:2015:PTS, author = "Adam Matthews and Stanislav Bobovych and Nilanjan Banerjee and James P. Parkerson and Ryan Robucci and Chintan Patel", title = "{Perpetuu}: a Tiered Solar-powered {GIS} Microserver", journal = j-TECS, volume = "14", number = "4", pages = "78:1--78:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2767128", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The aftermath of a natural disaster is characterized by lack of a reliable medium for dissemination of information to survivors. The state-of-the-art emergency response systems rely on satellite radio-enabled devices, but survivors, unlike first responders, do not have access to such devices. To mitigate this problem, we present perpetuu, a solar-powered portable GIS microserver. The microserver node can be deployed in a disaster scene and can serve maps to survivors viewable on browsers of off-the-shelf mobile systems. The perpetuu nodes can form a wireless mesh to cover a large geographic region. A key innovation in the design of the perpetuu node is a tiered software and hardware architecture --- the system combines a low-power micro-controller with a high-power micro-processor to provide a large spectrum of power states. perpetuu stays in its lowest power state most of the time, and it can in-vitro detect survivors using Wi-Fi sensing, and consequently wake up the higher-power tier to disseminate high-resolution maps on standard web browsers that provide directions to safe locations. The tiered design leverages hardware-assisted energy measurements and a wakeup controller to balance energy harvested from solar panels with energy consumed by the system. We evaluate perpetuu using measurements from our prototype and trace-based simulations, and show that it can function near-perpetually while serving maps to a large number of survivors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Medhat:2015:RMC, author = "Ramy Medhat and Borzoo Bonakdarpour and Deepak Kumar and Sebastian Fischmeister", title = "Runtime Monitoring of Cyber-Physical Systems Under Timing and Memory Constraints", journal = j-TECS, volume = "14", number = "4", pages = "79:1--79:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2744196", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The goal of runtime monitoring is to inspect the well-being of a system by employing a monitor process that reads the state of the system during execution and evaluates a set of properties expressed in some specification language. The main challenge in runtime monitoring is dealing with the costs imposed in terms of resource utilization. In the context of cyber-physical systems, it is crucial for a software monitoring solution to be time predictable to improve scheduling, as well as support composition of monitoring solutions with an overall predictable behavior. Moreover, a small memory footprint is often required in components of cyber-physical systems, especially in deeply embedded systems. In this article, we propose a novel control-theoretic software monitoring solution for coordinating time predictability and memory utilization in runtime monitoring of systems that interact with the physical world. The controllers attempt to reduce monitoring jitter and maximize memory utilization while simultaneously ensuring the soundness of evaluation of properties. For systems where multiple properties are required to be monitored simultaneously, we construct a buffer sharing mechanism in which controllers dynamically share the memory space to negate the effect of bursts of environment actions, thus reducing jitter due to transient high loads. To validate our design choices, we present three case studies: (1) a Bluetooth mobile payment system, which shows a sporadic rate of events during peak hours; (2) a laser beam stabilizer for target tracking, and (3) a monitoring system for air/fuel ratio in a car engine exhaust and the CAM inlet position in the engine's cylinders. The experimental results of the case studies demonstrate up to 40\% improvement in time predictability of the monitoring solution when compared to a basic event-triggered approach. Moreover, memory utilization reaches an average of 90\% when using our dynamic buffer resizing mechanism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gebotys:2015:SWP, author = "Catherine H. Gebotys and Brian A. White", title = "A Sliding Window Phase-Only Correlation Method for Side-Channel Alignment in a {Smartphone}", journal = j-TECS, volume = "14", number = "4", pages = "80:1--80:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2783441", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Future wireless embedded devices will be increasingly powerful, supporting many more applications including one of the most crucial, security. Although many embedded devices offer resistance to bus probing attacks due to their compact size and high levels of integration, susceptibility to attacks on their electromagnetic side channel must be analyzed. This side channel is often quite complex to analyze due to the complexities of the embedded device including operating system, interrupts, and so forth. This article presents a new methodology for analyzing a complex system's vulnerability to the EM side channel. The methodology proposes a sliding window phase-only correlation method for aligning electromagnetic emanations from a complex smartphone running native code utilizing an on-chip cache. Unlike previous research, experimental results demonstrate that data written to on-chip cache within an advanced 312MHz 0.13um processor executing AES can be attacked utilizing this new methodology. Furthermore, for the first time, it has been shown that the point of side-channel attack is not a spike of increased EM but an area of low EM amplitude, unlike what is noted in previous findings. This research is important for advancing side-channel analysis understanding in complex embedded processors and ensuring secure implementations in future embedded ubiquitous devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2015:RSP, author = "Qingling Zhao and Zonghua Gu and Haibo Zeng", title = "Resource Synchronization and Preemption Thresholds Within Mixed-Criticality Scheduling", journal = j-TECS, volume = "14", number = "4", pages = "81:1--81:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2783440", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In a mixed-criticality system, multiple tasks with different levels of criticality may coexist on the same hardware platform. The scheduling algorithm EDF-VD (Earliest Deadline First with Virtual Deadlines) has been proposed for mixed-criticality systems, which assumes tasks do not share any common resources. We present MC-SRP (Mixed-Criticality Stack Resource Policy), a resource synchronization protocol for EDF-VD, which allows resource sharing among tasks at the same criticality level and guarantees that each task is blocked at most once in each criticality mode. In addition, we present MC-SRPT (MC-SRP with Thresholds) for reducing the application stack size requirement in resource-constrained embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2015:SDE, author = "Ming-Ju Wu and Chun-Jen Tsai", title = "A Storage Device Emulator for System Performance Evaluation", journal = j-TECS, volume = "14", number = "4", pages = "82:1--82:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2785969", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The performance and characteristics of the storage devices used in embedded systems can have a great influence on the overall end user experience. When building embedded systems or designing new storage device components, it is important for the designers to be able to evaluate how storage devices of different characteristics will affect the overall system performance. Storage device emulation enables a system's performance to be evaluated with simulated storage devices that are not yet available. In storage device emulation, the emulated storage device appears to the operating system (OS) as a real storage device and its service timings are determined by a disk model, which simulates the behavior of the target storage device. In the conventional storage device emulators, because the OS is running continuously in the real-time domain, the amount of time that the emulators can spend on processing each I/O request is limited by the service time of each corresponding I/O request. This timing constraint can make emulating high-speed storage devices a challenge for the conventional storage device emulators. In this article, we propose an OS state pausing approach to storage device emulation that can overcome the timing constraints faced by the conventional storage device emulators. By pausing the state of the OS while the storage device emulator is busy, the proposed emulator can spend as much time as it needs for processing each I/O request without affecting the performance of the emulated storage device as perceived by the OS. This allows the proposed storage device emulator to emulate storage devices that would otherwise be challenging or even impossible for the conventional storage device emulators. In addition, the main task of storage device emulation is offloaded to an external computer to minimize the impact of the emulation workload on the target machine. The proposed storage device emulator is implemented with the Linux OS$^1$ on an embedded system development board. Experimental results show that the full-system performance benchmarks measured with the proposed storage device emulator are within 2\% differences compared to the results of the reference system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mirzoyan:2015:MNG, author = "Davit Mirzoyan and Benny Akesson and Sander Stuijk and Kees Goossens", title = "Maximizing the Number of Good Dies for Streaming Applications in {NoC}-Based0 {MPSoCs} Under Process Variation", journal = j-TECS, volume = "14", number = "4", pages = "83:1--83:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2785968", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Scaling CMOS technology into nanometer feature-size nodes has made it practically impossible to precisely control the manufacturing process. This results in variation in the speed and power consumption of a circuit. As a solution to process-induced variations, circuits are conventionally implemented with conservative design margins to guarantee the target frequency of each hardware component in manufactured multiprocessor chips. This approach, referred to as worst-case design, results in a considerable circuit upsizing, in turn reducing the number of dies on a wafer. This work deals with the design of real-time systems for streaming applications (e.g., video decoders) constrained by a throughput requirement (e.g., frames per second) with reduced design margins, referred to as better-than-worst-case design. To this end, the first contribution of this work is a complete modeling framework that captures a streaming application mapped to an NoC-based multiprocessor system with voltage-frequency islands under process-induced die-to-die and within-die frequency variations. The framework is used to analyze the impact of variations in the frequency of hardware components on application throughput at the system level. The second contribution of this work is a methodology to use the proposed framework and estimate the impact of reducing circuit design margins on the number of good dies that satisfy the throughput requirement of a real-time streaming application. We show on both synthetic and real applications that the proposed better-than-worst-case design approach can increase the number of good dies by up to 9.6\% and 18.8\% for designs with and without fixed SRAM and IO blocks, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2015:CDR, author = "Shiwen Zhang and Qingquan Zhang and Sheng Xiao and Ting Zhu and Yu Gu and Yaping Lin", title = "Cooperative Data Reduction in Wireless Sensor Network", journal = j-TECS, volume = "14", number = "4", pages = "84:1--84:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2786755", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In wireless sensor networks, owing to the limited energy of the sensor node, it is very meaningful to propose a dynamic scheduling scheme with data management that reduces energy as soon as possible. However, traditional techniques treat data management as an isolated process on only selected individual nodes. In this article, we propose an aggressive data reduction architecture, which is based on error control within sensor segments and integrates three parallel dynamic control mechanisms. We demonstrate that this architecture not only achieves energy savings but also guarantees the data accuracy specified by the application. Furthermore, based on this architecture, we propose two implementations. The experimental results show that both implementations can raise the energy savings while keeping the error at an predefined and acceptable level. We observed that, compared with the basic implementation, the enhancement implementation achieves a relatively higher data accuracy. Moreover, the enhancement implementation is more suitable for the harsh environmental monitoring applications. Further, when both implementations achieve the same accuracy, the enhancement implementation saves more energy. Extensive experiments on realistic historical soil temperature data confirm the efficacy and efficiency of two implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Scheir:2015:ASC, author = "Marijn Scheir and Josep Balasch and Alfredo Rial and Bart Preneel and Ingrid Verbauwhede", title = "Anonymous Split {E}-Cash-Toward Mobile Anonymous Payments", journal = j-TECS, volume = "14", number = "4", pages = "85:1--85:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2783439", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Anonymous E-Cash was first introduced in 1982 as a digital, privacy-preserving alternative to physical cash. A lot of research has since then been devoted to extend and improve its properties, leading to the appearance of multiple schemes. Despite this progress, the practical feasibility of E-Cash systems is still today an open question. Payment tokens are typically portable hardware devices in smart card form, resource constrained due to their size, and therefore not suited to support largely complex protocols such as E-Cash. Migrating to more powerful mobile platforms, for instance, smartphones, seems a natural alternative. However, this implies moving computations from trusted and dedicated execution environments to generic multiapplication platforms, which may result in security vulnerabilities. In this work, we propose a new anonymous E-Cash system to overcome this limitation. Motivated by existing payment schemes based on MTM (Mobile Trusted Module) architectures, we consider at design time a model in which user payment tokens are composed of two modules: an untrusted but powerful execution platform (e.g., smartphone) and a trusted but constrained platform (e.g., secure element). We show how the protocol's computational complexity can be relaxed by a secure split of computations: nonsensitive operations are delegated to the powerful platform, while sensitive computations are kept in a secure environment. We provide a full construction of our proposed Anonymous Split E-Cash scheme and show that it fully complies with the main properties of an ideal E-Cash system. Finally, we test its performance by implementing it on an Android smartphone equipped with a Java-Card-compatible secure element.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2015:AEB, author = "Jian-Min Jiang and Huibiao Zhu and Qin Li and Yongxin Zhao and Lin Zhao and Shi Zhang and Ping Gong and Zhong Hong", title = "Analyzing Event-Based Scheduling in Concurrent Reactive Systems", journal = j-TECS, volume = "14", number = "4", pages = "86:1--86:??", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2783438", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Dec 8 17:53:22 MST 2015", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The traditional research on scheduling focuses on task scheduling and schedulability analysis in concurrent reactive systems. In this article, we dedicate ourselves to event-based scheduling. We first formally define an event-based scheduling policy and propose the notion of the correctness of a scheduling policy in terms of weak termination. Then we investigate the correctness of the decomposition of scheduling controls and finally obtain a decentralized scheduling method. The method can automatically decompose the scheduling policies of a concurrent reactive system into atomic scheduling policies. Every atomic scheduling policy corresponds to one subsystem. Each of the subsystems is a completely independent system, which may be developed and deployed independently. An experiment demonstrates these results that may help engineers to design correct and efficient schedule policies for a concurrent reactive system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mandal:2016:DIW, author = "Kalikinkar Mandal and Xinxin Fan and Guang Gong", title = "Design and Implementation of {Warbler} Family of Lightweight Pseudorandom Number Generators for Smart Devices", journal = j-TECS, volume = "15", number = "1", pages = "1:1--1:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2808230", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the advent of ubiquitous computing and the Internet of Things (IoT), the security and privacy issues for various smart devices such as radio-frequency identification (RFID) tags and wireless sensor nodes are receiving increased attention from academia and industry. A number of lightweight cryptographic primitives have been proposed to provide security services for resource-constrained smart devices. As one of the core primitives, a cryptographically secure pseudorandom number generator (PRNG) plays an important role for lightweight embedded applications. The most existing PRNGs proposed for smart devices employ true random number generators as a component, which generally incur significant power consumption and gate count in hardware. In this article, we present Warbler family, a new pseudorandom number generator family based on nonlinear feedback shift registers (NLFSRs) with desirable randomness properties. The design of the Warbler family is based on the combination of modified de Bruijn blocks together with a nonlinear feedback Welch-Gong (WG) sequence generator, which enables us to precisely characterize the randomness properties and to flexibly adjust the security level of the resulting PRNG. Some criteria for selecting parameters of the Warbler family are proposed to offer the maximum level of security. Two instances of the Warbler family are also described, which feature two different security levels and are dedicated to EPC C1 Gen2 RFID tags and wireless sensor nodes, respectively. The security analysis shows that the proposed instances not only can pass the cryptographic statistical tests recommended by the EPC C1 Gen2 standard and NIST but also are resistant to the cryptanalytic attacks such as algebraic attacks, cube attacks, time-memory-data tradeoff attacks, Mihaljevi{\'c} et al.'s attacks, and weak internal state and fault injection attacks. Our ASIC implementations using a 65nm CMOS process demonstrate that the proposed two lightweight instances of the Warbler family can achieve good performance in terms of speed and area and provide ideal solutions for securing low-cost smart devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Poddar:2016:DHP, author = "Soumyajit Poddar and Prasun Ghosal and Hafizur Rahaman", title = "Design of a High-Performance {CDMA}-Based Broadcast-Free Photonic Multi-Core Network on Chip", journal = j-TECS, volume = "15", number = "1", pages = "2:1--2:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2839301", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Present-day focus on multicore research has not only increased computing power but also power- and bandwidth-efficient communication among cores. On-chip communication networks have become popular today because of their low energy use and modular structure compared to bus-based interconnects. Silicon photonics has further boosted the performance of on-chip interconnection networks with its low energy-delay product and high reliability. In current multicore Network-on-Chip (NoC) architectures, photonics is playing an important role in transferring large volumes of data both on- and off-chip. The problem addressed in this work is the issue of broadcast traffic arising due to invalidation requests from on-chip cache memories. Although such traffic is typically less than 1\% of total traffic, it can easily present a high load on network resources, creating congestion and degrading performance. In this article, we propose a CDMA-based, secure, scalable, and energy-efficient technique to eliminate broadcast invalidations and increase overall performance. Experimental results indicate a performance boost up to 22.2\% over a competing Photonic NoC and up to 57.4\% over Electrical Mesh-based NoC when the proposed technique is used. Additional hardware deployed has an area overhead of less than 1\%, whereas total energy consumed is at par with other state-of-the-art techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Santini:2016:BCS, author = "Thiago Santini and Paolo Rech and Gabriel Luca Nazar and Fl{\'a}vio Rech Wagner", title = "Beyond Cross-Section: Spatio-Temporal Reliability Analysis", journal = j-TECS, volume = "15", number = "1", pages = "3:1--3:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2794148", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A computational system employed in safety-critical applications typically has reliability as a primary concern. Thus, the designer focuses on minimizing the device radiation-sensitive area, often leading to performance degradation. In this article, we present a mathematical model to evaluate system reliability in spatial (i.e., radiation-sensitive area) and temporal (i.e., performance) terms and prove that minimizing radiation-sensitive area does not necessarily maximize application reliability. To support our claim, we present an empirical counterexample where application reliability is improved even if the radiation-sensitive area of the device is increased. An extensive radiation test campaign using a 28 nm commercial-off-the-shelf ARM-based SoC was conducted, and experimental results demonstrate that, while executing the considered application at military aircraft altitude, the probability of executing a two-year mission workload without failures is increased by 5.85\% if L1 caches are enabled (thus increasing the radiation-sensitive area) when compared to no cache level being enabled. However, if both L1 and L2 caches are enabled, the probability is decreased by 31.59\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gebotys:2016:PCP, author = "Catherine H. Gebotys and Brian A. White and Edgar Mateos", title = "Preaveraging and Carry Propagate Approaches to Side-Channel Analysis of {HMAC-SHA256}", journal = j-TECS, volume = "15", number = "1", pages = "4:1--4:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2794093", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Although HMAC-SHA has been standardized for over a decade, few published attacks on the single-cycle round implementation exist. In this research, new attack techniques are provided, for the first time, (1) to help to discriminate between values of secret intermediate variables within HMAC and (2) to reduce the large word size complexity. Preaveraging and carry propagate techniques are proposed using chosen plaintexts and shown to significantly reduce the complexity and runtimes for side-channel analysis of an Altera FPGA platform. This research is important for advancing side channel analysis of complex embedded ASICs and ensuring secure implementations in future embedded ubiquitous devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2016:DLD, author = "Wei Dong and Luyao Luo and Chao Huang", title = "Dynamic Logging with Dylog in Networked Embedded Systems", journal = j-TECS, volume = "15", number = "1", pages = "5:1--5:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2807698", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Event logging is an important technique for networked embedded systems like wireless sensor networks. It can greatly help developers to understand complex system behaviors and diagnose program bugs. Existing logging facilities do not well satisfy three practical requirements: flexibility, efficiency, and high synchronization accuracy. To simultaneously satisfy these requirements, we present Dylog, a dynamic logging facility for networked embedded systems. Dylog employs several techniques. First, Dylog uses binary instrumentation for dynamically inserting or removing logging statements, enabling flexible and interactive debugging at runtime. Second, Dylog incorporates an efficient storage system and log collection protocol for recording and transferring the logging messages. Third, Dylog employs a lightweight data-driven approach for reconstructing the synchronized time of the logging messages. Dylog uses MAC-layer timestamping and drift compensation to achieve high synchronization accuracy. We implement Dylog on the TinyOS 2.1.1/TelosB platform. Results show the following: (1) Dylog incurs a small overhead. Indirections in Dylog incur an additional execution overhead of less than 1\%. Dylog reduces the logging storage size by approximately 50\% compared with the standard TinyOS radio printf library. Dylog reduces the patch size by more than 90\%, compared with incremental reprogramming. (2) Dylog reduces the synchronization overhead by 78\% in terms of transmission cost, compared with a traditional time synchronization protocol, FTSP, and it can achieve a high time synchronization accuracy of 5.4 $ \mu $ s. (3) Dylog can help diagnose system problems effectively at the source-code level for three real-world scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2016:PAD, author = "Ke Jiang and Petru Eles and Zebo Peng", title = "Power-Aware Design Techniques of Secure Multimode Embedded Systems", journal = j-TECS, volume = "15", number = "1", pages = "6:1--6:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801152", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Nowadays, embedded systems have been widely used in all types of application areas, some of which belong to the safety and reliability critical domains. The functional correctness and design robustness of the embedded systems involved in such domains are crucial for the safety of personal/enterprise property or even human lives. Thereby, a holistic design procedure that considers all the important design concerns is essential. In this article, we approach embedded systems design from an integral perspective. We consider not only the classic real-time and quality of service requirements, but also the emerging security and power efficiency demands. Modern embedded systems are not any more developed for a fixed purpose, but instead designed for undertaking various processing requests. This leads to the concept of multimode embedded systems, in which the number and nature of active tasks change during runtime. Under dynamic situations, providing high performance along with various design concerns becomes a really difficult problem. Therefore, we propose a novel power-aware secure embedded systems design framework that efficiently solves the problem of runtime quality optimization with security and power constraints. The efficiency of our proposed techniques are evaluated in extensive experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bambagini:2016:EAS, author = "Mario Bambagini and Mauro Marinoni and Hakan Aydin and Giorgio Buttazzo", title = "Energy-Aware Scheduling for Real-Time Systems: a Survey", journal = j-TECS, volume = "15", number = "1", pages = "7:1--7:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2808231", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a survey of energy-aware scheduling algorithms proposed for real-time systems. The analysis presents the main results starting from the middle 1990s until today, showing how the proposed solutions evolved to address the evolution of the platform's features and needs. The survey first presents a taxonomy to classify the existing approaches for uniprocessor systems, distinguishing them according to the technology exploited for reducing energy consumption, that is, Dynamic Voltage and Frequency Scaling (DVFS), Dynamic Power Management (DPM), or both. Then, the survey discusses the approaches proposed in the literature to deal with the additional problems related to the evolution of computing platforms toward multicore architectures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thomas:2016:EDP, author = "Anna Thomas and Karthik Pattabiraman", title = "Error Detector Placement for Soft Computing Applications", journal = j-TECS, volume = "15", number = "1", pages = "8:1--8:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The scaling of Silicon devices has exacerbated the unreliability of modern computer systems, and power constraints have necessitated the involvement of software in hardware error detection. At the same time, emerging workloads in the form of soft computing applications (e.g., multimedia applications) can tolerate most hardware errors as long as the erroneous outputs do not deviate significantly from error-free outcomes. We term outcomes that deviate significantly from the error-free outcomes as Egregious Data Corruptions (EDCs). In this study, we propose a technique to place detectors for selectively detecting EDC-causing errors in an application. We performed an initial study to formulate heuristics that identify EDC-causing data. Based on these heuristics, we developed an algorithm that identifies program locations for placing high coverage detectors for EDCs using static analysis. Our technique achieves an average EDC coverage of 82\%, under performance overheads of 10\%, while detecting 10\% of the Non-EDC and benign faults. We also evaluate the error resilience of these applications under the 14 compiler optimizations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Corre:2016:TTB, author = "Youenn Corre and Jean-Philippe Diguet and Dominique Heller and Dominique Blouin and Lo{\"\i}c Lagadec", title = "{TBES}: Template-Based Exploration and Synthesis of Heterogeneous Multiprocessor Architectures on {FPGA}", journal = j-TECS, volume = "15", number = "1", pages = "9:1--9:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2816817", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article describes TBES, a software end-to-end environment for synthesizing multitask applications on FPGAs. The implementation follows a template-based approach for creating heterogeneous multiprocessor architectures. Heterogeneity stems from the use of general-purpose processors along with custom accelerators. Experimental results demonstrate substantial speedup for several classes of applications. Furthermore, this work allows for reducing development costs and saving development time for the software architect, the domain expert, and the optimization expert. This work provides a framework to bring together various existing tools and optimisation algorithms. The advantages are manifold: modularity and flexibility, easy customization for best-fit algorithm selection, durability and evolution over time, and legacy preservation including domain experts' know-how. In addition to the use of architecture templates for the overall system, a second contribution lies in using high-level synthesis for promoting exploration of hardware IPs. The domain expert, who best knows which tasks are good candidates for hardware implementation, selects parts of the initial application to be potentially synthesized as dedicated accelerators. As a consequence, the HLS general problem turns into a constrained and more tractable issue, and automation capabilities eliminate the need for tedious and error-prone manual processes during domain space exploration. The automation only takes place once the application has been broken down into concurrent tasks by the designer, who can then drive the synthesis process with a set of parameters provided by TBES to balance tradeoffs between optimization efforts and quality of results. The approach is demonstrated step by step up to FPGA implementations and executions with an MJPEG benchmark and a complex Viola-Jones face detection application. We show that TBES allows one to achieve results with up to 10 times speedup to reduce development times and to widen design space exploration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chatterjee:2016:TAD, author = "Urbi Chatterjee and Rajat Subhra Chakraborty and Hitesh Kapoor and Debdeep Mukhopadhyay", title = "Theory and Application of Delay Constraints in Arbiter {PUF}", journal = j-TECS, volume = "15", number = "1", pages = "10:1--10:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2815621", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Physically Unclonable Function (PUF) circuits are often vulnerable to mathematical model-building attacks. We theoretically quantify the advantage provided to an adversary by any training dataset expansion technique along the lines of security analysis of cryptographic hash functions. We present an algorithm to enumerate certain sets of delay constraints for the widely studied Arbiter PUF (APUF) circuit, then demonstrate how these delay constraints can be utilized to expand the set of known Challenge--Response Pairs (CRPs), thus facilitating model-building attacks. We provide experimental results for Field Programmable Gate Array (FPGA)--based APUF to establish the effectiveness of the proposed attack.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kwon:2016:CBF, author = "Se Jin Kwon", title = "A Cache-Based Flash Translation Layer for {TLC}-Based Multimedia Storage Devices", journal = j-TECS, volume = "15", number = "1", pages = "11:1--11:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2820614", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Current triple-level cell (TLC)-based solids-tate drives used in multimedia storage devices support multichannel access to increase capacity and throughput. Unfortunately, current state-of-the-art FTL algorithms must employ selective caching for inquiring about the address mapping information, which causes low space utilization, a large flash memory requirement, and performance degradation. In this article, the {$<$ u$>$Ca$<$}/{u$>$} che- {$<$ u$>$ b$<$}/{u$>$} ased Flash Translation Layer (Cab-FTL) is proposed for TLC-based multimedia storage devices. Cab-FTL enhances the read and write performances by achieving high space utilization while reducing the size of the mapping tables to 1.68\% compared to DFTL. Despite a caching of the mapping tables in DRAM, Cab-FTL achieves a fast system boot using its fast wake-up mechanism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2016:EPC, author = "Sheng-Min Huang and Li-Pin Chang", title = "Exploiting Page Correlations for Write Buffering in Page-Mapping Multichannel {SSDs}", journal = j-TECS, volume = "15", number = "1", pages = "12:1--12:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2815622", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advanced solid-state disks (SSDs) have been equipped with page-mapping flash translation layers and multichannel architectures. The SSDs employ a RAM-based write buffer, which delays write requests for reducing write traffic, reorders requests for mitigating garbage-collection overhead, and produces parallel page writes for improving channel time utilization. This work presents a novel write buffer algorithm that exploits temporal and spatial correlations among buffer pages. The write-buffer groups temporally or spatially correlate buffer pages and then write the grouped buffer pages to the same flash block. In this way, when the correlated page data are updated in the future, flash blocks will receive bulk page invalidations and become good candidates for garbage collection. With multichannel architectures, the write buffer adaptively disperses read-most sequential data over channels for high page-level parallelism of sequential reads, while clustering write-most sequential data in the same channel for a reduced cost of garbage collection. We evaluated the proposed method and previously proposed buffer algorithms. Our method was shown to outperform the existing methods by up to 134\%. We also implemented our buffer design on the OpenSSD platform; the time and space overheads of our design were reported to be very low.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2016:SGA, author = "Li-Pin Chang and Yu-Syun Liu and Wen-Huei Lin", title = "Stable Greedy: Adaptive Garbage Collection for Durable Page-Mapping Multichannel {SSDs}", journal = j-TECS, volume = "15", number = "1", pages = "13:1--13:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2820613", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Commodity solid state drives (SSDs) have recently begun involving the adoption of powerful controllers for multichannel flash management at the page level. However, many of these models still use primitive garbage-collection algorithms, because previous approaches are subject to poor scalability with high-capacity flash memory. This study presents Stable Greedy for garbage collection in page-mapping multichannel SSDs. Stable Greedy identifies page-accurate data hotness using block-level information, and jointly considers block space utilization and block stability for victim selection. Its design considers flash wear leveling for SSD lifetime enhancement at the block level as well as at the channel level. Stable Greedy runs at a constant time, and requires limited RAM space. The simulation results revealed that Stable Greedy outperformed previous methods considerably under various workloads and multichannel architectures. Stable Greedy was successfully implemented on the OpenSSD platform, and the actual performance measurements were consistent with the simulation results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2016:FFJ, author = "Jinghao Sun and Nan Guan and Yang Wang and Qingxu Deng and Peng Zeng and Wang Yi", title = "Feasibility of Fork-Join Real-Time Task Graph Models: Hardness and Algorithms", journal = j-TECS, volume = "15", number = "1", pages = "14:1--14:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2809780", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the formal analysis of real-time systems, modeling of branching codes and modeling of intratask parallelism structures are two of the most important research topics. These two real-time properties are combined, resulting in the fork-join real-time task (FJRT) model, which extends the digraph-based task model with forking and joining semantics. We prove that the EDF schedulability problem on a preemptive uniprocessor for the FJRT model is coNP-hard in the strong sense, even if the utilization of the task system is bounded by a constant strictly less than 1. Then, we show that the problem becomes tractable with some slight structural restrictions on parallel sections, for which we propose an exact schedulability test with pseudo-polynomial time complexity. Our results thus establish a borderline between the tractable and intractable FJRT models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{DiPietro:2016:CLD, author = "Roberto {Di Pietro} and Flavio Lombardi and Antonio Villani", title = "{CUDA} Leaks: a Detailed Hack for {CUDA} and a (Partial) Fix", journal = j-TECS, volume = "15", number = "1", pages = "15:1--15:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801153", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are increasingly common on desktops, servers, and embedded platforms. In this article, we report on new security issues related to CUDA, which is the most widespread platform for GPU computing. In particular, details and proofs-of-concept are provided about novel vulnerabilities to which CUDA architectures are subject. We show how such vulnerabilities can be exploited to cause severe information leakage. As a case study, we experimentally show how to exploit one of these vulnerabilities on a GPU implementation of the AES encryption algorithm. Finally, we also suggest software patches and alternative approaches to tackle the presented vulnerabilities.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2016:SDW, author = "Zhenhuan Zhu and S. Olutunde Oyadiji", title = "Structure Design of Wireless Sensor Nodes with Energy and Cost Awareness for Multichannel Signal Measurement", journal = j-TECS, volume = "15", number = "1", pages = "16:1--16:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2790300", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article aims to develop a design pattern of a wireless sensor node working in multichannel signal measurement for effectively lowering energy consumption and cost. The proposed design pattern enables the architecture of a wireless sensor node to adapt to application requirements, thus to significantly reduce system redundancy. Two multisensor structures are parameterized regarding frequency response, power consumption, and cost. The system design pattern provides flexibility through three proposed interface circuits that bridge between multisensor structures and the microprocessors inside sensor nodes. It also allows adjusting time the delay parameter that can enlarge the selection range of main electronic components, and thereby increases the robustness of the model for practical implementations. A virtual case study is provided to demonstrate how to apply this model into an application design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsiu:2016:UCS, author = "Pi-Cheng Hsiu and Po-Hsien Tseng and Wei-Ming Chen and Chin-Chiang Pan and Tei-Wei Kuo", title = "User-Centric Scheduling and Governing on Mobile Devices with {big.LITTLE} Processors", journal = j-TECS, volume = "15", number = "1", pages = "17:1--17:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2829946", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Mobile applications will become progressively more complicated and diverse. Heterogeneous computing architectures like big.LITTLE are a hardware solution that allows mobile devices to combine computing performance and energy efficiency. However, software solutions that conform to the paradigm of conventional fair scheduling and governing are not applicable to mobile systems, thereby degrading user experience or reducing energy efficiency. In this article, we exploit the concept of application sensitivity, which reflects the user's attention on each application, and devise a user-centric scheduler and governor that allocate computing resources to applications according to their sensitivity. Furthermore, we integrate our design into the Android operating system. The results of experiments conducted on a commercial big.LITTLE smartphone with real-world mobile apps demonstrate that the proposed design can achieve significant gains in energy efficiency while improving the quality of user experience.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sharma:2016:DFT, author = "Namita Sharma and Preeti Ranjan Panda and Francky Catthoor and Min Li and Prashant Agrawal", title = "Data Flow Transformation for Energy-Efficient Implementation of {Givens} Rotation-Based {QRD}", journal = j-TECS, volume = "15", number = "1", pages = "18:1--18:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2837025", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "QR decomposition (QRD), a matrix decomposition algorithm widely used in embedded application domain, can be realized in a large number of valid processing sequences that differ significantly in the number of memory accesses and computations, and hence the overall implementation energy. With modern low-power embedded processors evolving toward register files with wide memory interfaces and vector functional units (FUs), data flow in these algorithms needs to be carefully devised to efficiently utilize the costly wide memory accesses and the vector FUs. In this article, we present an energy-efficient data flow transformation strategy for the Givens rotation-based QRD.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Emeretlis:2016:LBB, author = "Andreas Emeretlis and George Theodoridis and Panayiotis Alefragis and Nikolaos Voros", title = "A {Logic-Based Benders} Decomposition Approach for Mapping Applications on Heterogeneous Multicore Platforms", journal = j-TECS, volume = "15", number = "1", pages = "19:1--19:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2838733", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The development of efficient methods for mapping applications on heterogeneous multicore platforms is a key issue in the field of embedded systems. In this article, a novel approach based on the Logic-Based Benders decomposition principle is introduced for mapping complex applications on these platforms, aiming at optimizing their execution time. To provide optimal solutions for this problem in a short time, a new hybrid model that combines Integer Linear Programming (ILP) and Constraint Programming (CP) models is introduced. Also, to reduce the complexity of the model and its solution time, a set of novel techniques for generating additional constraints called Benders cuts is proposed. An extensive set of experiments has been performed in which synthetic applications described by Directed Acyclic Graphs (DAGs) were mapped to a number of heterogeneous multicore platforms. Moreover, experiments with DAGs that correspond to two real-life applications have also been performed. Based on the experimental results, it is proven that the proposed approach outperforms the pure ILP model in terms of the solution time and quality of the solution. Specifically, the proposed approach is able to find an optimal solution within a time limit of 2 hours in the vast majority of performed experiments, while the pure ILP model fails. Also, for the cases where both methods fail to find an optimal solution within the time limit, the solution of the proposed approach is systematically better than the solution of the ILP model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ko:2016:SBS, author = "Yohan Ko and Jihoon Kang and Jongwon Lee and Yongjoo Kim and Joonhyun Kim and Hwisoo So and Kyoungwoo Lee and Yunheung Paek", title = "Software-Based Selective Validation Techniques for Robust {CGRAs} Against Soft Errors", journal = j-TECS, volume = "15", number = "1", pages = "20:1--20:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2843943", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Coarse-Grained Reconfigurable Architectures (CGRAs) are drawing significant attention since they promise both performances with parallelism and flexibility with reconfiguration. Soft errors (or transient faults) are becoming a serious design concern in embedded systems including CGRAs since the soft error rate is increasing exponentially as technology is scaling. A recently proposed software-based technique with TMR (Triple Modular Redundancy) implemented on CGRAs incurs extreme overheads in terms of runtime and energy consumption mainly due to expensive voting mechanisms for the outputs from the triplication of every operation. In this article, we propose selective validation mechanisms for efficient modular redundancy techniques in the datapaths on CGRAs. Our techniques selectively validate the results at synchronous operations rather than every operation in order to reduce the expensive performance overhead from the validation mechanism. We also present an optimization technique to further improve the runtime and the energy consumption by minimizing synchronous operations where a validating mechanism needs to be applied. Our experimental results demonstrate that our selective validation-based TMR technique with our optimization on CGRAs can improve the runtime by 41.0\% and the energy consumption by 26.2\% on average over benchmarks as compared to the recently proposed software-based TMR technique with the full validation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ivanov:2016:ARS, author = "Radoslav Ivanov and Miroslav Pajic and Insup Lee", title = "Attack-Resilient Sensor Fusion for Safety-Critical Cyber-Physical Systems", journal = j-TECS, volume = "15", number = "1", pages = "21:1--21:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2847418", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article focuses on the design of safe and attack-resilient Cyber-Physical Systems (CPS) equipped with multiple sensors measuring the same physical variable. A malicious attacker may be able to disrupt system performance through compromising a subset of these sensors. Consequently, we develop a precise and resilient sensor fusion algorithm that combines the data received from all sensors by taking into account their specified precisions. In particular, we note that in the presence of a shared bus, in which messages are broadcast to all nodes in the network, the attacker's impact depends on what sensors he has seen before sending the corrupted measurements. Therefore, we explore the effects of communication schedules on the performance of sensor fusion and provide theoretical and experimental results advocating for the use of the Ascending schedule, which orders sensor transmissions according to their precision starting from the most precise. In addition, to improve the accuracy of the sensor fusion algorithm, we consider the dynamics of the system in order to incorporate past measurements at the current time. Possible ways of mapping sensor measurement history are investigated in the article and are compared in terms of the confidence in the final output of the sensor fusion. We show that the precision of the algorithm using history is never worse than the no-history one, while the benefits may be significant. Furthermore, we utilize the complementary properties of the two methods and show that their combination results in a more precise and resilient algorithm. Finally, we validate our approach in simulation and experiments on a real unmanned ground robot.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2016:ESB, author = "Sandeep K. Shukla", title = "Editorial: Science of the Big and Small and Embedded Computing Systems", journal = j-TECS, volume = "15", number = "2", pages = "21:1--21:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2901293", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dutt:2016:TSE, author = "Nikil Dutt and Axel Jantsch and Santanu Sarma", title = "Toward Smart Embedded Systems: a Self-aware System-on-Chip {(SoC)} Perspective", journal = j-TECS, volume = "15", number = "2", pages = "22:1--22:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2872936", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems must address a multitude of potentially conflicting design constraints such as resiliency, energy, heat, cost, performance, security, etc., all in the face of highly dynamic operational behaviors and environmental conditions. By incorporating elements of intelligence, the hope is that the resulting ``smart'' embedded systems will function correctly and within desired constraints in spite of highly dynamic changes in the applications and the environment, as well as in the underlying software/hardware platforms. Since terms related to ``smartness'' (e.g., self-awareness, self-adaptivity, and autonomy) have been used loosely in many software and hardware computing contexts, we first present a taxonomy of ``self-x'' terms and use this taxonomy to relate major ``smart'' software and hardware computing efforts. A major attribute for smart embedded systems is the notion of self-awareness that enables an embedded system to monitor its own state and behavior, as well as the external environment, so as to adapt intelligently. Toward this end, we use a System-on-Chip perspective to show how the CyberPhysical System-on-Chip (CPSoC) exemplar platform achieves self-awareness through a combination of cross-layer sensing, actuation, self-aware adaptations, and online learning. We conclude with some thoughts on open challenges and research directions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vinco:2016:ESI, author = "Sara Vinco and Christian Pilato", title = "Editorial: Special Issue on Innovative Design Methods for Smart Embedded Systems", journal = j-TECS, volume = "15", number = "2", pages = "22:1--22:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2885505", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Oneto:2016:LHF, author = "Luca Oneto and Sandro Ridella and Davide Anguita", title = "Learning Hardware-Friendly Classifiers Through Algorithmic Stability", journal = j-TECS, volume = "15", number = "2", pages = "23:1--23:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2836165", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most state-of-the-art machine-learning (ML) algorithms do not consider the computational constraints of implementing the learned model on embedded devices. These constraints are, for example, the limited depth of the arithmetic unit, the memory availability, or the battery capacity. We propose a new learning framework, the Algorithmic Risk Minimization (ARM), which relies on Algorithmic-Stability, and includes these constraints inside the learning process itself. ARM allows one to train advanced resource-sparing ML models and to efficiently deploy them on smart embedded systems. Finally, we show the advantages of our proposal on a smartphone-based Human Activity Recognition application by comparing it to a conventional ML approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2016:AHR, author = "Anup Das and Bashir M. Al-Hashimi and Geoff V. Merrett", title = "Adaptive and Hierarchical Runtime Manager for Energy-Aware Thermal Management of Embedded Systems", journal = j-TECS, volume = "15", number = "2", pages = "24:1--24:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2834120", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern embedded systems execute applications, which interact with the operating system and hardware differently depending on the type of workload. These cross-layer interactions result in wide variations of the chip-wide thermal profile. In this article, a reinforcement learning-based runtime manager is proposed that guarantees application-specific performance requirements and controls the POSIX thread allocation and voltage/frequency scaling for energy-efficient thermal management. This controls three thermal aspects: peak temperature, average temperature, and thermal cycling. Contrary to existing learning-based runtime approaches that optimize energy and temperature individually, the proposed runtime manager is the first approach to combine the two objectives, simultaneously addressing all three thermal aspects. However, determining thread allocation and core frequencies to optimize energy and temperature is an NP-hard problem. This leads to exponential growth in the learning table (significant memory overhead) and a corresponding increase in the exploration time to learn the most appropriate thread allocation and core frequency for a particular application workload. To confine the learning space and to minimize the learning cost, the proposed runtime manager is implemented in a two-stage hierarchy: a heuristic-based thread allocation at a longer time interval to improve thermal cycling, followed by a learning-based hardware frequency selection at a much finer interval to improve average temperature, peak temperature, and energy consumption. This enables finer control on temperature in an energy-efficient manner while simultaneously addressing scalability, which is a crucial aspect for multi-/many-core embedded systems. The proposed hierarchical runtime manager is implemented for Linux running on nVidia's Tegra SoC, featuring four ARM Cortex-A15 cores. Experiments conducted with a range of embedded and cpu-intensive applications demonstrate that the proposed runtime manager not only reduces energy consumption by an average 15\% with respect to Linux but also improves all the thermal aspects-average temperature by 14${}^\circ $C, peak temperature by 16${}^\circ $C, and thermal cycling by 54\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gu:2016:RTF, author = "Xiaoqi Gu and Yongxin Zhu and Shengyan Zhou and Chaojun Wang and Meikang Qiu and Guoxing Wang", title = "A Real-Time {FPGA-Based} Accelerator for {ECG} Analysis and Diagnosis Using Association-Rule Mining", journal = j-TECS, volume = "15", number = "2", pages = "25:1--25:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2821508", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Telemedicine provides health care services at a distance using information and communication technologies, which intends to be a solution to the challenges faced by current health care systems with growing numbers of population, increased demands from patients, and shortages in human resources. Recent advances in telemedicine, especially in wearable electrocardiogram (ECG) monitors, call for more intelligent and efficient automatic ECG analysis and diagnostic systems. We present a streaming architecture implemented on Field-Programmable Gate Arrays (FPGAs) to accelerate real-time ECG signal analysis and diagnosis in a pipelining and parallel way. Association-rule mining is employed to generate early diagnostic results by matching features of ECG with generated association rules. To improve performance of the processing, we propose a hardware-oriented data-mining algorithm named Bit\_Q\_Apriori. The corresponding hardware implementation indicates a good scalability and outperforms other hardware designs in terms of performance, throughput, and hardware cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Johnson:2016:RTR, author = "Taylor T. Johnson and Stanley Bak and Marco Caccamo and Lui Sha", title = "Real-Time Reachability for Verified Simplex Design", journal = j-TECS, volume = "15", number = "2", pages = "26:1--26:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2723871", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Simplex architecture ensures the safe use of an unverifiable complex/smart controller by using it in conjunction with a verified safety controller and verified supervisory controller (switching logic). This architecture enables the safe use of smart, high-performance, untrusted, and complex control algorithms to enable autonomy without requiring the smart controllers to be formally verified or certified. Simplex incorporates a supervisory controller that will take over control from the unverified complex/smart controller if it misbehaves and use a safety controller. The supervisory controller should (1) guarantee that the system never enters an unsafe state (safety), but should also (2) use the complex/smart controller as much as possible (minimize conservatism). The problem of precisely and correctly defining the switching logic of the supervisory controller has previously been considered either using a control-theoretic optimization approach or through an offline hybrid-systems reachability computation. In this work, we show that a combined online/offline approach that uses aspects of the two earlier methods, along with a real-time reachability computation, also maintains safety, but with significantly less conservatism, allowing the complex controller to be used more frequently. We demonstrate the advantages of this unified approach on a saturated inverted pendulum system, in which the verifiable region of attraction is over twice as large compared to the earlier approach. Additionally, to validate the claims that the real-time reachability approach may be implemented on embedded platforms, we have ported and conducted embedded hardware studies using both ARM processors and Atmel AVR microcontrollers. This is the first ever demonstration of a hybrid-systems reachability computation in real time on actual embedded platforms, which required addressing significant technical challenges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baka:2016:NSS, author = "Maria-Iro Baka and Francky Catthoor and Dimitrios Soudris", title = "Near-Static Shading Exploration for Smart Photovoltaic Module Topologies Based on Snake-like Configurations", journal = j-TECS, volume = "15", number = "2", pages = "27:1--27:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2837026", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Variable shading due to clouds and nearby objects has a severe impact on the energy yield of photovoltaic installations. Due to the industry's standard of permanently series-connected cells in a photovoltaic (PV) module, partial shading creates mismatches between the Current-Voltage (I-V) characteristics of cells. This article proposes an alternative configurable intramodule cell interconnection topology whereby cell connections can be adapted during operation to allow an optimized power production. The proposed configurable topology outperforms significantly a conventional 10 $ \times $ 6 module under heavy shade. Moreover, this is achieved in a quite flexible way and with negligible overhead under uniform irradiation conditions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Malek:2016:RRQ, author = "Alirad Malek and Ioannis Sourdis and Stavros Tzilis and Yifan He and Gerard Rauwerda", title = "{RQNoC}: a Resilient Quality-of-Service Network-on-Chip with Service Redirection", journal = j-TECS, volume = "15", number = "2", pages = "28:1--28:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846097", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we describe RQNoC, a service-oriented Network-on-Chip (NoC) resilient to permanent faults. We characterize the network resources based on the particular service that they support and, when faulty, bypass them, allowing the respective traffic class to be redirected. We propose two alternatives for service redirection, each having different advantages and disadvantages. The first one, Service Detour, uses longer alternative paths through resources of the same service to bypass faulty network parts, keeping traffic classes isolated. The second approach, Service Merge, uses resources of other services providing shorter paths but allowing traffic classes to interfere with each other. The remaining network resources that are common for all services employ additional mechanisms for tolerating faults. Links tolerate faults using additional spare wires combined with a flit-shifting mechanism, and the router control is protected with Triple-Modular-Redundancy (TMR). The proposed RQNoC network designs are implemented in 65nm technology and evaluated in terms of performance, area, power consumption, and fault tolerance. Service Detour requires 9\% more area and consumes 7.3\% more power compared to a baseline network, not tolerant to faults. Its packet latency and throughput is close to the fault-free performance at low-fault densities, but fault tolerance and performance drop substantially for 8 or more network faults. Service Merge requires 22\% more area and 27\% more power than the baseline and has a 9\% slower clock. Compared to a fault-free network, a Service Merge RQNoC with up to 32 faults has increased packet latency up to 1.5 to 2.4$ \times $ and reduced throughput to 70\% or 50\%. However, it delivers substantially better fault tolerance, having a mean network connectivity above 90\% even with 32 network faults versus 41\% of a Service Detour network. Combining Serve Merge and Service Detour improves fault tolerance, further sustaining a higher number of network faults and reduced packet latency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ienne:2016:GES, author = "Paolo Ienne and Jean-Pierre Talpin", title = "Guest Editorial: Special Issue on Models and Methodologies for System Design", journal = j-TECS, volume = "15", number = "2", pages = "29:1--29:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2885503", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Desnos:2016:MRB, author = "Karol Desnos and Maxime Pelcat and Jean-Fran{\c{c}}ois Nezan and Slaheddine Aridhi", title = "On Memory Reuse Between Inputs and Outputs of Dataflow Actors", journal = j-TECS, volume = "15", number = "2", pages = "30:1--30:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2871744", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces a new technique to minimize the memory footprints of Digital Signal Processing (DSP) applications specified with Synchronous Dataflow (SDF) graphs and implemented on shared-memory Multiprocessor System-on-Chip (MPSoCs). In addition to the SDF specification, which captures data dependencies between coarse-grained tasks called actors, the proposed technique relies on two optional inputs abstracting the internal data dependencies of actors: annotations of the ports of actors, and script-based specifications of merging opportunities between input and output buffers of actors. Experimental results on a set of applications show a reduction of the memory footprint by 48\% compared to state-of-the-art minimization techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nouri:2016:ARA, author = "Ayoub Nouri and Marius Bozga and Anca Molnos and Axel Legay and Saddek Bensalem", title = "{ASTROLABE}: a Rigorous Approach for System-Level Performance Modeling and Analysis", journal = j-TECS, volume = "15", number = "2", pages = "31:1--31:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2885498", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Building abstract system-level models that faithfully capture performance and functional behavior for embedded systems design is challenging. Unlike functional aspects, performance details are rarely available during the early design phases, and no clear method is known to characterize them. Moreover, once such models are built, they are inherently complex as they mix software models, hardware constraints, and environment abstractions. Their analysis by using traditional performance evaluation methods is reaching the limit. In this article, we present a systematic approach for building stochastic abstract performance models using statistical inference and model calibration, and we propose statistical model checking as a scalable performance evaluation technique for them.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Butt:2016:DPH, author = "Shahzad Ahmad Butt and Mehdi Roozmeh and Luciano Lavagno", title = "Designing Parameterizable Hardware {IPs} in a Model-Based Design Environment for High-Level Synthesis", journal = j-TECS, volume = "15", number = "2", pages = "32:1--32:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2871737", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Model-based hardware design allows one to map a single model to multiple hardware and/or software architectures, essentially eliminating one of the major limitations of manual coding in C or RTL. Model-based design for hardware implementation has traditionally offered a limited set of microarchitectures, which are typically suitable only for some application scenarios. In this article we illustrate how digital signal processing (DSP) algorithms can be modeled as flexible intellectual property blocks to be used within the popular Simulink model-based design environment. These blocks are written in C and are designed for both functional simulation and hardware implementation, including architectural design space exploration and hardware implementation through high-level synthesis. A key advantage of our modeling approach is that the very same bit-accurate model is used for simulation and high-level synthesis. To prove the feasibility of our proposed approach, we modeled a fast Fourier transform (FFT) algorithm and synthesized it for different DSP applications with very different performance and cost requirements. We also implemented a high-level-synthesis (HLS) intellectual property (IP) generator that can generate flexible FFT HLS-IP blocks that can be mapped to multiple micro-/macroarchitectures, to enable design space exploration as well as being used for functional simulation in the Simulink environment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Le:2016:CBR, author = "Thi Thieu Hoa Le and Roberto Passerone and Uli Fahrenberg and Axel Legay", title = "Contract-Based Requirement Modularization via Synthesis of Correct Decompositions", journal = j-TECS, volume = "15", number = "2", pages = "33:1--33:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2885752", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In distributed development of modern systems, contracts play a vital role in ensuring interoperability of components and adherence to specifications. It is therefore often desirable to verify the satisfaction of an overall property represented as a contract, given the satisfaction of smaller properties also represented as contracts. When the verification result is negative, designers must face the issue of refining the subproperties and components. This is an instance of the classical synthesis problems: ``can we construct a model that satisfies some given specification?'' In this work, we propose two strategies enabling designers to synthesize or refine a set of contracts so that their composition satisfies a given contract. We develop a generic algebraic method and show how it can be applied in different contract models to support top-down component-based development of distributed systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rho:2016:GEC, author = "Seungmin Rho and Wenny Rahayu and Geyong Min", title = "Guest Editorial: Challenges of Embedded Systems as They Evolve into {M2M}, {Internet of Things}", journal = j-TECS, volume = "15", number = "2", pages = "34:1--34:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2886417", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zeng:2016:SLM, author = "Jing Zeng and Laurence T. Yang and Jianhua Ma", title = "A System-Level Modeling and Design for Cyber-Physical-Social Systems", journal = j-TECS, volume = "15", number = "2", pages = "35:1--35:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2834119", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The design of cyber-physical-social systems (CPSS) is a novel and challenging research field due that it emphasizes the deep fusion of cyberspace, physical space, and social space. In this article, we extend our previously proposed system-level design framework [Zeng et al. 2015] to tailor it to the needs of social scenario of multiple users. A hierarchical Petri net-based model and social flow are presented to extend the control flow and formally describe the social interactions of multiple users, respectively. By using the extended model, the system-level optimization for CPSS can be achieved by the improved design flow. Specifically, object emplacement and user satisfaction are further extended into the social environment. Also maximal power estimation algorithm is improved, leveraging the extended intermediate representation model. Finally, we use a smart office case to demonstrate the feasibility and effectiveness of our improved design approach for multiple users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2016:IRW, author = "Daqiang Zhang and Jiafu Wan and Zongjian He and Shengjie Zhao and Ke Fan and Sang Oh Park and Zhibin Jiang", title = "Identifying Region-Wide Functions Using Urban Taxicab Trajectories", journal = j-TECS, volume = "15", number = "2", pages = "36:1--36:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2821507", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the urban development and enlargement, various regions such as residential zones and administrative districts now appear as parts of cities. People exhibit different mobility patterns in each region, which is closely relevant to region-wide functions. In this article, we propose a scheme to discover region-wide functions using large-scale Shanghai taxicab trajectories that capture enormous traces for more than 13,000 taxicabs over a period of about 3 years. We investigate these taxicab trajectories and conduct an extensive preliminary study. Then, we divide the city into disjointed regions using Voronoi decomposition. By incorporating people's pick-up and drop-off information, we refine the Voronoi partitioning results to identify region-wide functional areas. Finally, we study people's movement frequency on weekdays and weekends for every kind of urban functional regions. We also look into human mobility within or across the identified urban functional regions. Experimental results show that human movement is bounded with the function of urban regions, and more than 90\% of people visit neighboring (less than 20km travel distance) functional regions with high probability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ji:2016:CLO, author = "Wen Ji and Bo-Wei Chen and Xiangdong Wang and Haiyong Luo and Mucheol Kim and Yiqiang Chen", title = "Cross-Layer Opportunistic Scheduling for Device-to-Device Video Multicast Services", journal = j-TECS, volume = "15", number = "2", pages = "37:1--37:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856034", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we address the problem of how to make the wireless device-to-device (D2D) video multicast systems have better quality provision with consideration of internet-of-things (IoT) applications. We propose an opportunistic transmission and fair resource allocation framework, including joint application-layer and physical-layer transmission and optimization. First, we use a parallel subchannels structure by concatenating the Fountain codes and diversity-embedded space-time block codes to provide reliable and flexible transmission in heterogeneous circumstances. Second, we exploit the quality of heterogeneous user experience (quality of experience) metric under D2D video multicast systems, with consideration of various channel states, device capability, video content urgency, and the number of demanding users. Third, we formulate reliable multiple video streams broadcasting to heterogeneous devices as an aggregate maximum utility achieving problem, and we use opportunistic scheduling to select suitable users in each transmission interval to improve the broadcasting utility. Fourth, we use the utility fair scheme to guide rate allocation among multicontent video multicast. Extensive performance comparison and analysis are presented to demonstrate efficiency of the proposed solution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2016:SMA, author = "Lu Liu and Nick Antonopoulos and Minghui Zheng and Yongzhao Zhan and Zhijun Ding", title = "A Socioecological Model for Advanced Service Discovery in Machine-to-Machine Communication Networks", journal = j-TECS, volume = "15", number = "2", pages = "38:1--38:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2811264", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The new development of embedded systems has the potential to revolutionize our lives and will have a significant impact on future Internet of Thing (IoT) systems if required services can be automatically discovered and accessed at runtime in Machine-to-Machine (M2M) communication networks. It is a crucial task for devices to perform timely service discovery in a dynamic environment of IoTs. In this article, we propose a Socioecological Service Discovery (SESD) model for advanced service discovery in M2M communication networks. In the SESD network, each device can perform advanced service search to dynamically resolve complex enquires and autonomously support and co-operate with each other to quickly discover and self-configure any services available in M2M communication networks to deliver a real-time capability. The proposed model has been systematically evaluated and simulated in a dynamic M2M environment. The experiment results show that SESD can self-adapt and self-organize themselves in real time to generate higher flexibility and adaptability and achieve a better performance than the existing methods in terms of the number of discovered service and a better efficiency in terms of the number of discovered services per message.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmad:2016:EMB, author = "Awais Ahmad and Anand Paul and Mazhar Rathore and Hangbae Chang", title = "An Efficient Multidimensional Big Data Fusion Approach in Machine-to-Machine Communication", journal = j-TECS, volume = "15", number = "2", pages = "39:1--39:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2834118", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Machine-to-Machine communication (M2M) is nowadays increasingly becoming a world-wide network of interconnected devices uniquely addressable, via standard communication protocols. The prevalence of M2M is bound to generate a massive volume of heterogeneous, multisource, dynamic, and sparse data, which leads a system towards major computational challenges, such as, analysis, aggregation, and storage. Moreover, a critical problem arises to extract the useful information in an efficient manner from the massive volume of data. Hence, to govern an adequate quality of the analysis, diverse and capacious data needs to be aggregated and fused. Therefore, it is imperative to enhance the computational efficiency for fusing and analyzing the massive volume of data. Therefore, to address these issues, this article proposes an efficient, multidimensional, big data analytical architecture based on the fusion model. The basic concept implicates the division of magnitudes (attributes), i.e., big datasets with complex magnitudes can be altered into smaller data subsets using five levels of the fusion model that can be easily processed by the Hadoop Processing Server, resulting in formalizing the problem of feature extraction applications using earth observatory system, social networking, or networking applications. Moreover, a four-layered network architecture is also proposed that fulfills the basic requirements of the analytical architecture. The feasibility and efficiency of the proposed algorithms used in the fusion model are implemented on Hadoop single-node setup on UBUNTU 14.04 LTS core i5 machine with 3.2GHz processor and 4GB memory. The results show that the proposed system architecture efficiently extracts various features (such as land and sea) from the massive volume of satellite data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2016:UMA, author = "Eui-Jik Kim and Jung-Hyok Kwon and Ken Choi and Taeshik Shon", title = "Unified Medium Access Control Architecture for Resource-Constrained Machine-to-Machine Devices", journal = j-TECS, volume = "15", number = "2", pages = "40:1--40:??", month = may, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2876958", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In capillary machine-to-machine (M2M) communications, which is being considered as a feasible network solution for M2M applications, because of physical resource constraints and deployment conditions, an energy-efficient and scalable medium access control (MAC) protocol is crucial for numerous M2M devices to concurrently access wireless channels. Therefore, this paper presents a unified MAC layer architecture for resource-constrained M2M devices in capillary M2M networks [named as resource-constrained MAC architecture (RCMA)], which has a unified (monolithic) framework consisting of essential functional components to support MAC-related operations of M2M devices: multi-channel hybrid MAC (McHM), logical link control (LLC), time synchronizer (TS), and device on--off scheduler (DO2S). McHM provides a baseline MAC protocol for an entire capillary M2M system that combines the benefit of both contention-based carrier sense multiple access and schedule-based time division multiple access schemes, whereas the other three components help in the McHM operations. To demonstrate the effectiveness of the RCMA, we implement the whole stack using the QualNet simulator. Experimental results show that the RCMA outperforms the conventional ZigBee stack in terms of energy efficiency and scalability, even under heavy traffic conditions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Franchino:2016:BOE, author = "Gianluca Franchino and Giorgio Buttazzo and Mauro Marinoni", title = "Bandwidth Optimization and Energy Management in Real-Time Wireless Networks", journal = j-TECS, volume = "15", number = "3", pages = "41:1--41:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2851498", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In embedded systems operated by battery and interacting with the environment, a fundamental issue is the enforcement of real-time and energy constraints to guarantee a desired lifetime with a given performance. A lot of research has focused on energy management at the communication level; however, not many authors considered both real-time and energy requirements in wireless communication systems. This article proposes El-SMan, a power-aware framework working in combination with MAC layer communication protocols for maximizing battery lifetime in wireless networks of embedded systems with real-time constraints. Exploiting the flexibility in bandwidth requirements, El-SMan adapts stream parameters to balance performance versus energy consumption, taking both lifetime and message deadlines into account.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2016:EFI, author = "Sandeep K. Shukla", title = "Editorial: Fence Itself Grazing the Field --- Security from the Sentries", journal = j-TECS, volume = "15", number = "3", pages = "41:1--41:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2953045", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2016:UUP, author = "Yichuan Wang and Xin Liu and Cheng-Hsin Hsu", title = "{UPDATE}: {User-Profile-Driven Adaptive TransfEr} for Mobile Devices", journal = j-TECS, volume = "15", number = "3", pages = "42:1--42:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2889489", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Existing channel-aware scheduling work has mainly focused on scheduling in small timescales, that is, tens to hundreds of seconds. We propose to use long-term user profiles to provide useful statistical information on future network conditions in large timescales. We design scheduling algorithms based on Markov decision theory. We collect and use a large set of real-life traces from the general public. Extensive trace-driven evaluations show that many real mobile users can benefit from our framework. In addition, we compare our framework against state-of-the-art algorithms and observe significant performance differences because the existing algorithms were not designed for the large timescale scenario.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sotiriou-Xanthopoulos:2016:IEV, author = "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis and Kostas Siozios and George Economakos and Dimitrios Soudris", title = "An Integrated Exploration and Virtual Platform Framework for Many-Accelerator Heterogeneous Systems", journal = j-TECS, volume = "15", number = "3", pages = "43:1--43:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2866578", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The recent advent of many-accelerator systems-on-chip (SoC), driven by the need for maximizing throughput and power efficiency, has led to an exponential increase in the hardware/software co-design complexity. The reason of this increase is that the designer has to explore a vast number of architectural parameter combinations for each single accelerator, as well as inter-accelerator configuration combinations under specific area, throughput, and power constraints, given that each accelerator has different computational requirements. In such a case, the design space size explodes. Thus, existing design space exploration (DSE) techniques give poor-quality solutions, as the design space cannot be adequately covered in a fair time. This problem is aggravated by the very long simulation time of the many-accelerator virtual platforms (VPs). This article addresses these design issues by (a) presenting a virtual prototyping solution that decreases the exploration time by enabling the evaluation of multiple configurations per VP simulation and (b) proposing a DSE methodology that efficiently explores the design space of many-accelerator systems. With the use of two fully developed use cases, namely an H.264 decoding server for multiple video streams and a parallelized denoising system for MRI scans, we show that the proposed DSE methodology either leads to Pareto points that dominate over those of a typical DSE scenario or finds new solutions that might not be found by the typical DSE. In addition, the proposed virtual prototyping solution leads to DSE runtime reduction reaching 10 $ \times $ for H.264 and 5 $ \times $ for Rician denoise.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Beretta:2016:PCA, author = "Ivan Beretta and Vincenzo Rana and Abdulkadir Akin and Alessandro Antonio Nacci and Donatella Sciuto and David Atienza", title = "Parallelizing the Chambolle Algorithm for Performance-Optimized Mapping on {FPGA} Devices", journal = j-TECS, volume = "15", number = "3", pages = "44:1--44:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2851497", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The performance and the efficiency of recent computing platforms have been deeply influenced by the widespread adoption of hardware accelerators, such as graphics processing units (GPUs) or field-programmable gate arrays (FPGAs), which are often employed to support the tasks of general-purpose processors (GPPs). One of the main advantages of these accelerators over their sequential counterparts (GPPs) is their ability to perform massive parallel computation. However, to exploit this competitive edge, it is necessary to extract the parallelism from the target algorithm to be executed, which generally is a very challenging task. This concept is demonstrated, for instance, by the poor performance achieved on relevant multimedia algorithms, such as Chambolle, which is a well-known algorithm employed for the optical flow estimation. The implementations of this algorithm that can be found in the state of the art are generally based on GPUs but barely improve the performance that can be obtained with a powerful GPP. In this article, we propose a novel approach to extract the parallelism from computation-intensive multimedia algorithms, which includes an analysis of their dependency schema and an assessment of their data reuse. We then perform a thorough analysis of the Chambolle algorithm, providing a formal proof of its inner data dependencies and locality properties. Then, we exploit the considerations drawn from this analysis by proposing an architectural template that takes advantage of the fine-grained parallelism of FPGA devices. Moreover, since the proposed template can be instantiated with different parameters, we also propose a design metric, the expansion rate, to help the designer in the estimation of the efficiency and performance of the different instances, making it possible to select the right one before the implementation phase. We finally show, by means of experimental results, how the proposed analysis and parallelization approach leads to the design of efficient and high-performance FPGA-based implementations that are orders of magnitude faster than the state-of-the-art ones.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nagar:2016:FPW, author = "Kartik Nagar and Y. N. Srikant", title = "Fast and Precise Worst-Case Interference Placement for Shared Cache Analysis", journal = j-TECS, volume = "15", number = "3", pages = "45:1--45:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2854151", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time systems require a safe and precise estimate of the worst-case execution time (WCET) of programs. In multicore architectures, the precision of a program's WCET estimate highly depends on the precision of its predicted shared cache behavior. Prediction of shared cache behavior is difficult due to the uncertain timing of interfering shared cache accesses made by programs running on other cores. Given the assignment of programs to cores, the worst-case interference placement (WCIP) technique tries to find the worst-case timing of interfering accesses, which would cause the maximum number of cache misses on the worst case path of the program, to determine its WCET. Although WCIP generates highly precise WCET estimates, the current ILP-based approach is also known to have very high analysis time. In this work, we investigate the WCIP problem in detail and determine its source of hardness. We show that performing WCIP is an NP-hard problem by reducing the 0-1 knapsack problem. We use this observation to make simplifying assumptions, which make the WCIP problem tractable, and we propose an approximate greedy technique for WCIP, whose time complexity is linear in the size of the program. We perform extensive experiments to show that the assumptions do not affect the precision of WCIP but result in significant reduction of analysis time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Starke:2016:EDV, author = "Renan Augusto Starke and Andreu Carminati and R{\^o}mulo {Silva De Oliveira}", title = "Evaluating the Design of a {VLIW} Processor for Real-Time Systems", journal = j-TECS, volume = "15", number = "3", pages = "46:1--46:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2889490", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Nowadays, many real-time applications are very complex and as the complexity and the requirements of those systems become more demanding, more hardware processing capacity is necessary. Unfortunately, the correct functioning of real-time systems depends not only on the logically correct response but also on the time when it is produced. General-purpose processor design fails to deliver analyzability due to their nondeterministic behavior caused by the use of cache memories, dynamic branch prediction, speculative execution, and out-of-order pipelines. In this article, we investigate the pipeline performance of Very Long Instruction Word (VLIW) architectures for real-time systems with an in-order pipeline considering Worst-Case Execution Time (WCET) performance. Techniques on obtaining the WCET of VLIW machines are also considered and we make a quantification on how important are hardware techniques such as static branch prediction, predication, and pipeline speed of complex operations such as memory access and multiplication for high-performance real-time systems. The memory hierarchy is out of the scope of this article and we used a classic deterministic structure formed by a direct mapped instruction cache and a data scratchpad memory. A VLIW prototype was implemented in VHDL from scratch considering the HP VLIW ST231 ISA. We also show some compiler insights and we use a representative subset of the M{\"a}lardalen's WCET benchmarks for validation and performance quantification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2016:SMR, author = "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim and Seungryoul Maeng", title = "{SmartLMK}: a Memory Reclamation Scheme for Improving User-Perceived App Launch Time", journal = j-TECS, volume = "15", number = "3", pages = "47:1--47:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2894755", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the mobile computing environment evolves, users demand high-quality apps and better user experience. Consequently, memory demand in mobile devices has soared. Device manufacturers have fulfilled the demand by equipping devices with more RAM. However, such a hardware approach is only a temporary solution and does not scale well in the resource-constrained mobile environment. Meanwhile, mobile systems adopt a new app life cycle and a memory reclamation scheme tailored for the life cycle. When a user leaves an app, the app is not terminated but cached in memory as long as there is enough free memory. If the free memory gets low, a victim app is terminated and the associated memory to the app is reclaimed. This process-level approach has worked well in the mobile environment. However, user experience can be impaired severely because the victim selection policy does not consider the user experience. In this article, we propose a novel memory reclamation scheme called SmartLMK. SmartLMK minimizes the impact of the process-level reclamation on user experience. The worthiness to keep an app in memory is modeled by means of user-perceived app launch time and app usage statistics. The memory footprint and impending memory demand are estimated from the history of the memory usage. Using these values and memory models, SmartLMK picks up the least valuable apps and terminates them at once. Our evaluation on a real Android-based smartphone shows that SmartLMK efficiently distinguishes the valuable apps among cached apps and keeps those valuable apps in memory. As a result, the user-perceived app launch time can be improved by up to 13.2\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2016:APA, author = "Dongwon Kim and Yohan Chon and Wonwoo Jung and Yungeun Kim and Hojung Cha", title = "Accurate Prediction of Available Battery Time for Mobile Applications", journal = j-TECS, volume = "15", number = "3", pages = "48:1--48:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2875423", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy consumption in mobile devices is an important issue for both system developers and users. Users are aware of the battery-related information of their mobile devices and tend to take appropriate actions to increase the battery life. In this article, we propose a framework that accurately estimates the remaining battery time of applications at runtime. The framework profiles the power behavior of applications tied with activated hardware components and estimates the remaining battery budget utilizing the battery-related data provided by the device. The experiments validate that our method predicts the remaining battery time for applications with approximately 93\% of accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2016:NSC, author = "Rehan Ahmed and Parameswaran Ramanathan and Kewal K. Saluja", title = "Necessary and Sufficient Conditions for Thermal Schedulability of Periodic Real-Time Tasks Under Fluid Scheduling Model", journal = j-TECS, volume = "15", number = "3", pages = "49:1--49:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2883612", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the growing need to address the thermal issues in modern processing platforms, various performance throttling schemes have been proposed in literature (DVFS, clock gating, and so on) to manage temperature. In real-time systems, such methods are often unacceptable, as they can result in potentially catastrophic deadline misses. As a result, real-time scheduling research has recently focused on developing algorithms that meet the compute deadline while satisfying power and thermal constraints. Basic bounds that can determine if a set of tasks can be scheduled or not were established in the 1970s based on computation utilization. Similar results for thermal bounds have not been forthcoming. In this article, we address the problem of thermal constraint schedulability of tasks and derive necessary and sufficient conditions for thermal feasibility of periodic tasksets on a unicore system. We prove that a GPS-inspired fluid scheduling scheme is thermally optimal when context switch/preemption overhead is ignored. Extension of sufficient conditions to a nonfluid model is still an open problem. We also extend some of the results to a multicore processing environment. We demonstrate the efficacy of our results through extensive simulations. We also evaluate the proposed concepts on a hardware testbed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2016:USS, author = "Fang Li and Jiafu Wan and Ping Zhang and Di Li and Daqiang Zhang and Keliang Zhou", title = "Usage-Specific Semantic Integration for Cyber-Physical Robot Systems", journal = j-TECS, volume = "15", number = "3", pages = "50:1--50:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873057", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The multidisciplinary nature and time criticality of computing in Cyber-Physical Robot Systems (CPRS) makes it significantly different from traditional computer systems. This article attempts to create a usage-specific language called Cyber-Physical Robot Language (CPRL), which supports the CPRS design and implementation in an integrative and swift way. Multiview description and integration strategies as well as formal execution semantics for usage-specific simulation and verification are outlined. A graphic unified environment for CPRS modeling is supplied, in which several tools are integrated. A 6-DOF distributed robot system development in the environment is presented. The approach is an attempt to support CPRS design in an effective way, at the same time guaranteeing the system function and performance requirements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{An:2016:MBD, author = "Xin An and Eric Rutten and Jean-Philippe Diguet and Abdoulaye Gamati{\'e}", title = "Model-Based Design of Correct Controllers for Dynamically Reconfigurable Architectures", journal = j-TECS, volume = "15", number = "3", pages = "51:1--51:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873056", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Dynamically reconfigurable hardware has been identified as a promising solution for the design of energy-efficient embedded systems. However, its adoption is limited by costly design effort, including verification and validation, which is even more complex than for nondynamically reconfigurable systems. In this article, we propose a tool-supported formal method to automatically design a correct-by-construction control of the reconfiguration. By representing system behaviors with automata, we exploit automated algorithms to synthesize controllers that safely enforce reconfiguration strategies formulated as properties to be satisfied by control. We design generic modeling patterns for a class of reconfigurable architectures, taking into account both hardware architecture and applications, as well as relevant control objectives. We validate our approach on two case studies implemented on FPGAs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hilal:2016:CEA, author = "Allaa R. Hilal and Otman Basir", title = "A Collaborative Energy-Aware Sensor Management System Using Team Theory", journal = j-TECS, volume = "15", number = "3", pages = "52:1--52:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2910574", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With limited battery supply, power is a scarce commodity in wireless sensor networks. Thus, to prolong the lifetime of the network, it is imperative that the sensor resources are managed effectively. This task is particularly challenging in heterogeneous sensor networks for which decisions and compromises regarding sensing strategies are to be made under time and resource constraints. In such networks, a sensor has to reason about its current state to take actions that are deemed appropriate with respect to its mission, its energy reserve, and the survivability of the overall network. Sensor Management controls and coordinates the use of the sensory suites in a manner that maximizes the success rate of the system in achieving its missions. This article focuses on formulating and developing an autonomous energy-aware sensor management system that strives to achieve network objectives while maximizing its lifetime. A team-theoretic formulation based on the Belief-Desire-Intention (BDI) model and the Joint Intention theory is proposed as a mechanism for effective and energy-aware collaborative decision-making. The proposed system models the collective behavior of the sensor nodes using the Joint Intention theory to enhance sensors' collaboration and success rate. Moreover, the BDI modeling of the sensor operation and reasoning allows a sensor node to adapt to the environment dynamics, situation-criticality level, and availability of its own resources. The simulation scenario selected in this work is the surveillance of the Waterloo International Airport. Various experiments are conducted to investigate the effect of varying the network size, number of threats, threat agility, environment dynamism, as well as tracking quality and energy consumption, on the performance of the proposed system. The experimental results demonstrate the merits of the proposed approach compared to the state-of-the-art centralized approach adapted from Atia et al. [2011] and the localized approach in Hilal and Basir [2015] in terms of energy consumption, adaptability, and network lifetime. The results show that the proposed approach has 12 $ \times $ less energy consumption than that of the popular centralized approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ungerer:2016:PIH, author = "Theo Ungerer and Christian Bradatsch and Martin Frieb and Florian Kluge and J{\"o}rg Mische and Alexander Stegmeier and Ralf Jahr and Mike Gerdes and Pavel Zaykov and Lucie Matusova and Zai Jian Jia Li and Zlatko Petrov and Bert B{\"o}ddeker and Sebastian Kehr and Hans Regler and Andreas Hugl and Christine Rochange and Haluk Ozaktas and Hugues Cass{\'e} and Armelle Bonenfant and Pascal Sainrat and Nick Lay and David George and Ian Broster and Eduardo Qui{\~n}ones and Milos Panic and Jaume Abella and Carles Hernandez and Francisco Cazorla and Sascha Uhrig and Mathias Rohde and Arthur Pyka", title = "Parallelizing Industrial Hard Real-Time Applications for the {parMERASA} Multicore", journal = j-TECS, volume = "15", number = "3", pages = "53:1--53:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2910589", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The EC project parMERASA (Multicore Execution of Parallelized Hard Real-Time Applications Supporting Analyzability) investigated timing-analyzable parallel hard real-time applications running on a predictable multicore processor. A pattern-supported parallelization approach was developed to ease sequential to parallel program transformation based on parallel design patterns that are timing analyzable. The parallelization approach was applied to parallelize the following industrial hard real-time programs: 3D path planning and stereo navigation algorithms (Honeywell International s.r.o.), control algorithm for a dynamic compaction machine (BAUER Maschinen GmbH), and a diesel engine management system (DENSO AUTOMOTIVE Deutschland GmbH). This article focuses on the parallelization approach, experiences during parallelization with the applications, and quantitative results reached by simulation, by static WCET analysis with the OTAWA tool, and by measurement-based WCET analysis with the RapiTime tool.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tanasa:2016:CAP, author = "Bogdan Tanasa and Unmesh D. Bordoloi and Petru Eles and Zebo Peng", title = "Correlation-Aware Probabilistic Timing Analysis for the Dynamic Segment of {FlexRay}", journal = j-TECS, volume = "15", number = "3", pages = "54:1--54:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2870635", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose an analytical framework for probabilistic timing analysis of the event-triggered Dynamic segment of the FlexRay communication protocol. Specifically, our framework computes the Deadline Miss Ratio of each message. The core problem is formulated as a Mixed Integer Linear Program (MILP). Given the intractability of the problem, we also propose several techniques that help to mitigate the running times of our tool. This includes the re-engineering of the problem to run it on GPUs as well as reformulating the MILP itself. Most importantly, we also show how our framework can handle correlations between the queuing events of messages. This is challenging because one cannot apply the convolution operator in the same way as in the case of independent queuing events.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2016:BAU, author = "Ming-Chang Yang and Yuan-Hao Chang and Che-Wei Tsao", title = "Byte-Addressable Update Scheme to Minimize the Energy Consumption of {PCM}-Based Storage Systems", journal = j-TECS, volume = "15", number = "3", pages = "55:1--55:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2910590", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In recent years, phase-change memory (PCM) has generated a great deal of interest because of its byte addressability and nonvolatility properties. It is regarded as a good alternative storage medium that can reduce the performance gap between the main memory and the secondary storage in computing systems. However, its high energy consumption on writes is a challenging issue in the design of battery-powered mobile computing systems. To reduce the energy consumption, we exploit the byte addressability and the asymmetric read-write energy/latency of PCM in an energy-efficient update scheme for journaling file systems. We also introduce a concept called the 50\% rule to determine/recommend the best update strategy for block updates. The proposed scheme only writes modified data, instead of the whole updated block, to PCM-based storage devices without extra hardware support. Moreover, it guarantees the sanity/integrity of file systems even if the computing system crashes or there is a power failure during the data update process. We implemented the proposed scheme on the Linux system and conducted a series of experiments to evaluate the scheme. The results are very encouraging.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2016:EIR, author = "Biao Hu and Kai Huang and Gang Chen and Long Cheng and Alois Knoll", title = "Evaluation and Improvements of Runtime Monitoring Methods for Real-Time Event Streams", journal = j-TECS, volume = "15", number = "3", pages = "56:1--56:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2890503", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Runtime monitoring is of great importance as a safeguard to guarantee the correctness of system runtime behaviors. Two state-of-the-art methods, dynamic counters and l -repetitive function, were recently developed to tackle the runtime monitoring for real-time systems. While both are reported to be efficient in monitoring arbitrary events, the monitoring performance between them has not yet been evaluated. This article evaluates both methods in depth, to identify their strengths and weaknesses. New methods are proposed to efficiently monitor the many-to-one connections that are abstracted as AND and OR components on multiple inputs. Representative scenarios are used as our case studies to quantitatively demonstrate the evaluations. Both methods are implemented in hardware F pga. The timing overhead and resource usages of implementing the two methods are evaluated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lu:2016:VCV, author = "Yaojie Lu and Seyedamin Rooholamin and Sotirios G. Ziavras", title = "Vector Coprocessor Virtualization for Simultaneous Multithreading", journal = j-TECS, volume = "15", number = "3", pages = "57:1--57:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2898364", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Vector coprocessors (VPs), commonly being assigned exclusively to a single thread/core, are not often performance and energy efficient due to mismatches with the vector needs of individual applications. We present in this article an easy-to-implement VP virtualization technique that, when applied, enables a multithreaded VP to simultaneously execute multiple threads of similar or arbitrary vector lengths to achieve improved aggregate utilization. With a vector register file (VRF) virtualization technique invented to dynamically allocate physical vector registers to threads, our VP virtualization approach improves programmer productivity by providing at runtime a distinct physical register name space to each competing thread, thus eliminating the need to solve register-name conflicts statically. We applied our virtualization technique to a multithreaded VP and prototyped an FPGA-based multicore processor system that supports VP sharing as well as power gating for better energy efficiency. Under the dynamic creation of disparate threads, our benchmarking results show impressive VP speedups of up to 333\% and total energy savings of up to 37\% with proper thread scheduling and power gating compared to a similar-sized system that allows VP access to just one thread at a time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2016:HMR, author = "Hwajeong Seo and Zhe Liu and Yasuyuki Nogami and Jongseok Choi and Howon Kim", title = "Hybrid {Montgomery} Reduction", journal = j-TECS, volume = "15", number = "3", pages = "58:1--58:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2890502", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a hybrid method to improve the performance of the Montgomery reduction by taking advantage of the Karatsuba technique. We divide the Montgomery reduction into two sub-parts, including one for the conventional Montgomery reduction and the other one for Karatsuba-aided multiplication. This approach reduces the multiplication complexity of $n$-limb Montgomery reduction from $ \theta (n^2 + n)$ to asymptotic complexity $ \theta (7 n^2 / 8 + n)$. Our practical implementation results over an 8-bit microcontroller also show performance enhancements by 11\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Filippopoulos:2016:IEM, author = "Iason Filippopoulos and Namita Sharma and Francky Catthoor and Per Gunnar Kjeldsberg and Preeti Ranjan Panda", title = "Integrated Exploration Methodology for Data Interleaving and Data-to-Memory Mapping on {SIMD} Architectures", journal = j-TECS, volume = "15", number = "3", pages = "59:1--59:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2894754", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This work presents a methodology for efficient exploration of data interleaving and data-to-memory mapping options for Single Instruction Multiple Data (SIMD) platform architectures. The system architecture consists of a reconfigurable clustered scratch-pad memory and a SIMD functional unit, which performs the same operation on multiple input data in parallel. The memory accesses contribute substantially to the overall energy consumption of an embedded system executing a data intensive task. The scope of this work is the reduction of the overall energy consumption by increasing the utilization of the functional units and decreasing the number of memory accesses. The presented methodology is tested using a number of benchmark applications with holes in their access scheme. Potential gains are calculated based on the energy models, both for the processing and the memory part of the system. The reduction in energy consumption after efficient interleaving and mapping of data is between 40\% and 80\% for the complete system and the studied benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ferreira:2016:LRF, author = "Ronaldo R. Ferreira and Gabriel L. Nazar and Jean {Da Rolt} and {\'A}lvaro F. Moreira and Luigi Carro", title = "Live-Out Register Fencing: Interrupt-Triggered Soft Error Correction Based on the Elimination of Register-to-Register Communication", journal = j-TECS, volume = "15", number = "3", pages = "60:1--60:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873058", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces Live-Out Register Fencing (LoRF), a soft error correction mechanism that uses the novel Spill Register File as a container of checkpointing data. LoRF's Spill Register File holds the values shared among basic blocks in the program, and, coupled with a new compilation strategy, LoRF allows for error correction in the same basic block where the error was detected. In LoRF, error correction is triggered by a hardware interrupt that restores the registers of a basic block from the Spill Register File. After these registers are restored, the basic block where the error was detected can just be re-executed, thus reducing the costs of error recovery. LoRF's error correction policy eliminates the need for expensive architectural support for checkpointing and rollback, reducing the performance overhead of online soft error correction. LoRF relies on both a modified processor architecture and a corresponding compiler. The architecture was implemented in synthesizable VHDL, whereas the compiler was developed as an extension of the LLVM framework. Fault injection experiments support an error correction coverage of 99.35\% and a mean performance overhead of 1.33 for the entire life cycle of an error from its occurrence to its elimination from the system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khalid:2016:RHL, author = "Ayesha Khalid and Goutam Paul and Anupam Chattopadhyay and Faezeh Abediostad and Syed Imad Ud Din and Muhammad Hassan and Baishik Biswas and Prasanna Ravi", title = "{RunStream}: a High-Level Rapid Prototyping Framework for Stream Ciphers", journal = j-TECS, volume = "15", number = "3", pages = "61:1--61:??", month = jul, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2891412", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jul 21 17:18:13 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present RunStream, a rapid prototyping framework for realizing stream cipher implementations based on algorithmic specifications and architectural customizations desired by the users. In the dynamic world of cryptography where newer recommendations are frequently proposed, the need of such tools is imperative. It carries out design validation and generates an optimized software implementation and a synthesizable Register Transfer Level Verilog description. Our framework enables speedy benchmarking against critical resources like area, throughput, power, and latency and allows exploration of alternatives. Using RunStream, we successfully implemented various stream ciphers and benchmarked the quality of results to be at par with published hand-optimized implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2016:ESE, author = "Sandeep K. Shukla", title = "Editorial: Security of Embedded Systems and Cyber Irons --- Embedded Systems for Security", journal = j-TECS, volume = "15", number = "4", pages = "62:1--62:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2976731", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Eles:2016:GES, author = "Petru Eles and Rolf Ernst", title = "Guest Editorial for Special Issue of {ESWEEK 2015}", journal = j-TECS, volume = "15", number = "4", pages = "63:1--63:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2968218", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{You:2016:VVA, author = "Yi-Ping You and Szu-Chien Chen", title = "{VecRA}: a Vector-Aware Register Allocator for {GPU} Shader Processors", journal = j-TECS, volume = "15", number = "4", pages = "64:1--64:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2961026", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are now widely used in embedded systems for manipulating computer graphics and even for general-purpose computation. However, many embedded systems have to manage highly restricted hardware resources in order to achieve high performance or energy efficiency. The number of registers is one of the common limiting factors in an embedded GPU design. Programs that run with a low number of registers may suffer from high register pressure if register allocation is not properly designed, especially on a GPU in which a register is divided into four elements and each element can be accessed separately, because allocating a register for a vector-type variable that does not contain values in all elements wastes register spaces. In this article, we present a vector-aware register allocation framework to improve register utilization on shader architectures. The framework involves two major components: (1) element-based register allocation that allocates registers based on the element requirement of variables and (2) register packing that rearranges elements of registers in order to increase the number of contiguous free elements, thereby keeping more live variables in registers. Experimental results on a cycle-approximate simulator showed that the proposed framework decreased 92\% of register spills in total and made 91.7\% of 14 common shader programs spill free. These results indicate an opportunity for energy management of the space that is used for storing spilled variables, with the framework improving the performance by a geometric mean of 8.3\%, 16.3\%, and 29.2\% for general shader processors in which variables are spilled to memory with 5-, 10-, and 20-cycle access latencies, respectively. Furthermore, the reduction in the register requirement of programs enabled another 11 programs with high register pressure to be runnable on a lightweight GPU.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2016:ETA, author = "Weichen Liu and Chunhua Xiao", title = "An Efficient Technique of Application Mapping and Scheduling on Real-Time Multiprocessor Systems for Throughput Optimization", journal = j-TECS, volume = "15", number = "4", pages = "65:1--65:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950051", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multiprocessor systems are becoming ubiquitous in today's embedded systems design. In this article, we address the problem of mapping an application represented by a Homogeneous Synchronous Dataflow (HSDF) graph onto a real-time multiprocessor platform with the objective of maximizing total throughput. We propose that the optimal solution to the problem is composed of three components: actor-to-processor mapping, retiming, and actor ordering on each processor. The entire problem is systematically modeled into a Boolean Satisfiability (SAT) problem such that the optimal solution can be guaranteed theoretically. In order to explore the vast solution space more efficiently, we develop a specific HSDF theory solver based on the special characteristics of the timed HSDF, and integrate it into the general search framework of the SAT solver. Two alternative integration methods based on branch-and-bound are presented to achieve early branch pruning in the search space; thus, the scalability is greatly improved. Extensive performance evaluation on synthetic examples and a case study on the realistic H.264 Video Decoder show that our approach provides as much as 76.9\% throughput improvement, and is scalable to industry-sized applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Duraisamy:2016:HPE, author = "Karthi Duraisamy and Hao Lu and Partha Pratim Pande and Ananth Kalyanaraman", title = "High-Performance and Energy-Efficient Network-on-Chip Architectures for Graph Analytics", journal = j-TECS, volume = "15", number = "4", pages = "66:1--66:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2961027", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With its applicability spanning numerous data-driven fields, the implementation of graph analytics on multicore platforms is gaining momentum. One of the most important components of a multicore chip is its communication backbone. Due to inherent irregularities in data movements manifested by graph-based applications, it is essential to design efficient on-chip interconnection architectures for multicore chips performing graph analytics. In this article, we present a detailed analysis of the traffic patterns generated by graph-based applications when mapped to multicore chips. Based on this analysis, we explore the design-space for the Network-on-Chip (NoC) architecture to enable an efficient implementation of graph analytics. We principally consider three types of NoC architectures, viz., traditional mesh, small-world, and high-radix networks. We demonstrate that the small-world-network-enabled wireless NoC (WiNoC) is the most suitable platform for executing the considered graph applications. The WiNoC achieves an average of 38\% and 18\% full-system Energy Delay Product savings compared to wireline-mesh and high-radix NoCs, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kriebel:2016:RAA, author = "Florian Kriebel and Semeen Rehman and Arun Subramaniyan and Segnon Jean Bruno Ahandagbe and Muhammad Shafique and J{\"o}rg Henkel", title = "Reliability-Aware Adaptations for Shared Last-Level Caches in Multi-Cores", journal = j-TECS, volume = "15", number = "4", pages = "67:1--67:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2961059", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "On account of their large footprint, on-chip last-level caches in multi-core systems are one of the most vulnerable components to soft errors. However, vulnerability to soft errors highly depends on the configuration and parameters of the last-level cache, especially when executing different applications concurrently. In this article we propose a novel reliability-aware reconfigurable last-level cache architecture (R$^2$ Cache) and cache vulnerability model for multi-cores. R$^2$ Cache supports various reliability-wise efficient cache configurations (i.e., cache parameter selection and cache partitioning) for different concurrently executing applications. The proposed vulnerability model takes into account the vulnerability of both the data and tag arrays as well as the active cache area for applications in different execution phases. To enable runtime adaptations, we introduce a lightweight online vulnerability predictor that exploits the knowledge of performance metrics like number of L2 misses to accurately estimate the cache vulnerability to soft errors. Based on the predicted vulnerabilities of different concurrently executing applications in the current execution epoch, our runtime reliability manager reconfigures the cache such that, for the next execution epoch, the total vulnerability for all concurrently executing applications is minimized under user-provided tolerable performance/energy overheads. In scenarios where single-bit error correction for cache lines may be afforded, vulnerability-aware reconfigurations can be leveraged to increase the reliability of the last-level cache against multi-bit errors. Compared to state-of-the-art vulnerability-minimizing and reconfigurable caches, the proposed architecture provides 35.27\% and 23.42\% vulnerability savings, respectively, when averaged across numerous experiments, while reducing the vulnerability by more than 65\% and 60\%, respectively, for selected applications and application phases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Spasic:2016:IHR, author = "Jelena Spasic and Di Liu and Emanuele Cannella and Todor Stefanov", title = "On the Improved Hard Real-Time Scheduling of Cyclo-Static Dataflow", journal = j-TECS, volume = "15", number = "4", pages = "68:1--68:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2932188", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, it has been shown that the hard real-time scheduling theory can be applied to streaming applications modeled as acyclic Cyclo-Static Dataflow (CSDF) graphs. However, this recent approach is not always efficient in terms of throughput and processor utilization. Therefore, in this article, we propose an improved hard real-time scheduling approach to schedule streaming applications modeled as acyclic CSDF graphs on a Multiprocessor System-on-Chip (MPSoC) platform. The proposed approach converts each actor in a CSDF graph to a set of real-time periodic tasks. The conversion enables application of many hard real-time scheduling algorithms that offer fast calculation of the required number of processors for scheduling the tasks. In addition, we propose a method to reduce the graph latency when the converted tasks are scheduled as real-time periodic tasks. We evaluate the performance and time complexity of our approach in comparison to several existing scheduling approaches. Experiments on a set of real-life streaming applications demonstrate that our approach (1) results in systems with higher throughput and better processor utilization in comparison to the existing hard real-time scheduling approach for CSDF graphs, while requiring comparable time for the system derivation; (2) delivers shorter application latency by applying the proposed method for graph latency reduction while providing better throughput and processor utilization when compared to the existing hard real-time scheduling approach; (3) gives the same throughput as the existing periodic scheduling approach for CSDF graphs, but requires much shorter time to derive the task schedule and tasks' parameters (periods, start times, and so on); and (4) gives the throughput that is equal to or very close to the maximum achievable throughput of an application obtained via self-timed scheduling, but requires much shorter time to derive the schedule. The total time needed for the proposed conversion approach and the calculation of the minimum number of processors needed to schedule the tasks and the calculation of the size of communication buffers between tasks is in the range of seconds.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Allamigeon:2016:SAM, author = "Xavier Allamigeon and St{\'e}phane Gaubert and Nikolas Stott and {\'E}ric Goubault and Sylvie Putot", title = "A Scalable Algebraic Method to Infer Quadratic Invariants of Switched Systems", journal = j-TECS, volume = "15", number = "4", pages = "69:1--69:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2932187", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present a new numerical abstract domain based on ellipsoids designed for the formal verification of switched linear systems. Unlike the existing approaches, this domain does not rely on a user-given template. We overcome the difficulty that ellipsoids do not have a lattice structure by exhibiting a canonical operator over-approximating the union. This operator is the only one that permits the performance of analyses that are invariant with respect to a linear transformation of state variables. It provides the minimum volume ellipsoid enclosing two given ellipsoids. We show that it can be computed in O ( n$^3$ ) elementary algebraic operations. We finally develop a fast nonlinear power-type algorithm, which allows one to determine sound quadratic invariants on switched systems in a tractable way, by solving fixed-point problems over the space of ellipsoids. We test our approach on several benchmarks, and compare it with the standard techniques based on linear matrix inequalities, showing an important speedup on typical instances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2016:SAR, author = "Xueguang Wu and Liqian Chen and Antoine Min{\'e} and Wei Dong and Ji Wang", title = "Static Analysis of Runtime Errors in Interrupt-Driven Programs via Sequentialization", journal = j-TECS, volume = "15", number = "4", pages = "70:1--70:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2914789", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded software often involves intensive numerical computations and suffers from a number of runtime errors. The technique of numerical static analysis is of practical importance for checking the correctness of embedded software. However, most of the existing approaches of numerical static analysis consider sequential programs, while interrupts are a commonly used facility that introduces concurrency in embedded systems. Therefore, a numerical static analysis approach is highly desired for embedded software with interrupts. In this article, we propose a static analysis approach specifically for interrupt-driven programs based on sequentialization techniques. We present a method to sequentialize interrupt-driven programs into nondeterministic sequential programs according to the semantics of interrupts. The key benefit of using sequentialization is the ability to leverage the power of state-of-the-art analysis and verification techniques for sequential programs to analyze interrupt-driven programs, for example, the power of numerical abstract interpretation to analyze numerical properties of the sequentialized programs. Furthermore, to improve the analysis precision and scalability, we design specific abstract domains to analyze sequentialized interrupt-driven programs by considering their specific features. Finally, we present encouraging experimental results obtained by our prototype implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baudart:2016:LTT, author = "Guillaume Baudart and Albert Benveniste and Timothy Bourke", title = "Loosely Time-Triggered Architectures: Improvements and Comparisons", journal = j-TECS, volume = "15", number = "4", pages = "71:1--71:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2932189", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Loosely Time-Triggered Architectures (LTTAs) are a proposal for constructing distributed embedded control systems. They build on the quasi-periodic architecture, where computing units execute nearly periodically, by adding a thin layer of middleware that facilitates the implementation of synchronous applications. In this article, we show how the deployment of a synchronous application on a quasi-periodic architecture can be modeled using a synchronous formalism. Then we detail two protocols, Back-Pressure LTTA, reminiscent of elastic circuits, and Time-Based LTTA, based on waiting. Compared to previous work, we present controller models that can be compiled for execution, a simplified version of the Time-Based protocol and optimizations for systems using broadcast communication. We also compare the LTTA approach with architectures based on clock synchronization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shen:2016:UAS, author = "Jie Shen and Yingjue Cai and Yang Ren and Xiao Yang", title = "A Universal Application Storage System Based on Smart Card", journal = j-TECS, volume = "15", number = "4", pages = "72:1--72:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2886116", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Nowadays, electronic commerce (e-commerce) has brought facilitation to people's daily lives. Smart-card-based systems are widely used as an implementation, where smart cards act as a secure carrier for small-sized data. However, most of these systems are developed and managed by each service provider individually and repeatedly, which causes both unnecessary work and difficulties in future maintenance. Besides, advantages of smart card technology are not full-fledged for the lack of enough consideration in flexibility and security. To propose a solution, this article presents a Universal Application Storage System, including card side, terminal side, and back-end system. The card side provides a universal and secured infrastructure for data storage, where data are organized and stored in a card file system with several security mechanisms. In the terminal side, a framework for accessing various forms of secure element is presented to simplify the procedures involved in manipulating smart cards. Through this framework, the back-end system is able to establish a direct connection to the card, and performs authorized operations by exchanging commands in a secure channel. The validity of the proposed system is verified at the end of this article, illustrated by an e-coupon system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hassan:2016:HSB, author = "Hadeer A. Hassan and Sameh A. Salem and Ahmed M. Mostafa and E. M. Saad", title = "Harmonic Segment-Based Semi-Partitioning Scheduling on Multi-Core Real-Time Systems", journal = j-TECS, volume = "15", number = "4", pages = "73:1--73:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2933388", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Nowadays, the issue of scheduling multi-core real-time systems has become the focus of such research in industrial, biomedical, military, and other fields. As a consequence, a new semi-partitioning algorithm that uses a static Rate-Monotonic criterion to schedule real-time tasks on multi-core platforms is proposed. The improvement in the performance of real-time systems is achieved by exploiting the fact that the utilization boundary of a task set increases to fully utilize the processors if the periods of tasks have harmonic nature among each other. Experimental results on randomly generated datasets and real-world datasets show that the proposed algorithm inevitably outperforms other competitive algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2016:JJO, author = "Chin-Hsien Wu and Syuan-An Chen", title = "{JOM}: a Joint Operation Mechanism for {NAND} Flash Memory", journal = j-TECS, volume = "15", number = "4", pages = "74:1--74:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2915916", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the storage systems of NAND flash memory, an intermediate software called a Flash Translation Layer (FTL) is adopted to hide the characteristics of NAND flash memory and provide efficient management for NAND flash memory. Current flash translation layers can be classified into a page-mapping FTL, a block-mapping FTL, and a hybrid-mapping FTL. In order to utilize the advantages of the page-mapping FTL and the block-mapping FTL, the hybrid-mapping FTL is proposed to store data to the appropriate mapping mechanism by switching the mapping information between the page-mapping mechanism and the block-mapping mechanism. In the article, we propose a joint operation mechanism to rethink the advantages of the page-mapping FTL, the block-mapping FTL, and the hybrid-mapping FTL. With the joint operation mechanism, a flash translation layer can consider the main memory requirements, improve the system performance, and reduce the garbage collection overhead. The experimental results show that the proposed joint operation mechanism can achieve the goal under realistic workloads and benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chiew:2016:NEI, author = "Wei Ming Chiew and Feng Lin and Hock Soon Seah", title = "A Novel Embedded Interpolation Algorithm with Negative Squared Distance for Real-Time Endomicroscopy", journal = j-TECS, volume = "15", number = "4", pages = "75:1--75:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2905367", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Interpolation is the most executed operation and one of the main bottlenecks in embedded imaging, registration, and rendering systems. Existing methods either lack parallelization and scalability capabilities or are too computationally complex to execute efficiently. Acknowledging that improving execution time leads to degradation in image quality, we formulate a novel Negative Squared Distance (NSD) interpolation method that exhibits excellent performance by exploiting Look-Up Table (LUT) optimization for Field Programmable Gate Array (FPGA) speedup, with a balanced trade-off in quality in our embedded endomicroscopic imaging system. Quantitative analysis on performance and resource utilization of NSD against existing methods is reported through an implementation on a Xilinx ML605 platform. Functional validation using practical image resizing and rotation applications to compare qualitative performance against existing algorithms is performed and presented with visual and numerical results. Our method is shown to have a smaller design size and produces a maximum throughput of over twofold against trilinear interpolation with on-par image quality as the baseline method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2016:CFQ, author = "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu", title = "{CURA}: a Framework for Quality-Retaining Power Saving on Mobile {OLED} Displays", journal = j-TECS, volume = "15", number = "4", pages = "76:1--76:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2909875", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Organic Light-Emitting Diode (OLED) technology is regarded as a promising alternative to mobile displays. In this article, we introduce the design, algorithm, and implementation of a novel framework called CURA for quality-retaining power saving on mobile OLED displays. First, we link human visual attention to OLED power saving and model the OLED image scaling optimization problem. The objective is to minimize the power required to display an image without adversely impacting the user's visual experience. Then, we present the algorithm used to solve the modeled problem, and prove its optimality even without an accurate power model. Finally, based on the framework, we implement two practical applications on a commercial OLED mobile tablet. The results of experiments conducted on the tablet with real images demonstrate that CURA can reduce significant OLED power consumption while retaining the visual quality of images.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hester:2016:PCB, author = "Josiah Hester and Nicole Tobias and Amir Rahmati and Lanny Sitanayah and Daniel Holcomb and Kevin Fu and Wayne P. Burleson and Jacob Sorber", title = "Persistent Clocks for Batteryless Sensing Devices", journal = j-TECS, volume = "15", number = "4", pages = "77:1--77:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903140", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sensing platforms are becoming batteryless to enable the vision of the Internet of Things, where trillions of devices collect data, interact with each other, and interact with people. However, these batteryless sensing platforms-that rely purely on energy harvesting-are rarely able to maintain a sense of time after a power failure. This makes working with sensor data that is time sensitive especially difficult. We propose two novel, zero-power timekeepers that use remanence decay to measure the time elapsed between power failures. Our approaches compute the elapsed time from the amount of decay of a capacitive device, either on-chip Static Random-Access Memory (SRAM) or a dedicated capacitor. This enables hourglass-like timers that give intermittently powered sensing devices a persistent sense of time. Our evaluation shows that applications using either timekeeper can keep time accurately through power failures as long as 45s with low overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xi:2016:FSS, author = "Kai Xi and Jiankun Hu and B. V. K. Vijaya Kumar", title = "{FE-SViT}: a {SViT}-Based Fuzzy Extractor Framework", journal = j-TECS, volume = "15", number = "4", pages = "78:1--78:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2930669", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As a promising bio-cryptographic technique, the fuzzy extractor seamlessly binds biometrics and cryptography for template protection and key generation. However, most existing methods hardly solve the following issues simultaneously: (1) Fingerprint registration, (2) Verification accuracy, (3) Security strength, and (4) Computational efficiency. In this article, we introduce a bio-crypto-oriented fingerprint verification scheme --- Selective Vertex-indexed Triangulation (SViT) which maps minutia global topology to local triangulation with minimum information loss. Then, a SViT-based fuzzy extractor framework (FE-SViT) is proposed and high verification accuracy is achieved. The FE-SViT is highly parallelizable and efficient which makes it suitable for embedded devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Olivier:2016:MEP, author = "Pierre Olivier and Jalil Boukhobza and Eric Senn and Hamza Ouarnoughi", title = "A Methodology for Estimating Performance and Power Consumption of Embedded Flash File Systems", journal = j-TECS, volume = "15", number = "4", pages = "79:1--79:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903139", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Sep 1 16:03:45 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the embedded systems domain, obtaining performance and power consumption estimations is extremely valuable in numerous cases. This is particularly true during the design stage, as designers of complex embedded systems face an increasingly large design space. Secondary storage is a well-known performance bottleneck and has also been reported as an important factor of power consumption. Flash memory is the main secondary storage media in an embedded system and exhibits specific constraints in its usage. One popular way to manage these constraints is to use dedicated Flash File Systems (FFS). In this article, we propose a methodology to estimate the performance and power consumption of applicative I/Os on an FFS-based storage system within embedded Linux. The methodology is divided into three sequential steps. In the exploration phase, the main factors of an FFS storage system impacting performance and power consumption are identified. In the modeling phase, this impact is formalized into models. Finally, in the last phase, the models are implemented in a simulator named OpenFlash. OpenFlash allows obtaining performance and power consumption estimations for an applicative workload processed by the Linux FFS storage stack on an embedded platform. The simulator is validated against real measurements and the estimation error stays below 10\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2016:EDP, author = "Sandeep K. Shukla", title = "Editorial: Distributed Public Ledgers and Block Chains --- What Good Are They for Embedded Systems?", journal = j-TECS, volume = "16", number = "1", pages = "1:1--1:2", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3001902", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Castrillon:2016:GES, author = "Jeronimo Castrillon and Cristina Silvano", title = "Guest Editorial: Special Issue on {Virtual Prototyping of Parallel and Embedded Systems (ViPES)}", journal = j-TECS, volume = "16", number = "1", pages = "2:1--2:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2991466", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Bortolotti:2016:VRT, author = "Daniele Bortolotti and Andrea Marongiu and Luca Benini", title = "{VirtualSoC}: a Research Tool for Modern {MPSoCs}", journal = j-TECS, volume = "16", number = "1", pages = "3:1--3:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2930665", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Architectural heterogeneity has proven to be an effective design paradigm to cope with an ever-increasing demand for computational power within tight energy budgets, in virtually every computing domain. Programmable manycore accelerators are currently widely used not only in high-performance computing systems, but also in embedded devices, in which they operate as coprocessors under the control of a general-purpose CPU (the host processor). Clearly, such powerful hardware architectures are paired with sophisticated and complex software ecosystems, composed of operating systems, programming models plus associated runtime engines, and increasingly complex user applications with related libraries. System modeling has always played a key role in early architectural exploration or software development when the real hardware is not available. The necessity of efficiently coping with the huge HW/SW design space provided by the described heterogeneous Systems on Chip (SoCs) calls for advanced full-system simulation methodologies and tools, capable of assessing various metrics for the functional and nonfunctional properties of the target system. In this article, we describe VirtualSoC, a simulation tool targeting the full-system simulation of massively parallel heterogeneous SoCs. We also describe how VirtualSoC has been successfully adopted in several research projects.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Wehner:2016:SRM, author = "Philipp Wehner and Jens Rettkowski and Tobias Kalb and Diana G{\"o}hringer", title = "Simulating Reconfigurable Multiprocessor Systems-on-Chip with {MPSoCSim}", journal = j-TECS, volume = "16", number = "1", pages = "4:1--4:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2972952", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Upcoming reconfigurable Multiprocessor Systems-on-Chip (MPSoCs) present new challenges for the design and early estimation of technology requirements due to their runtime adaptive hardware architecture. The usage of simulators offers capabilities to overcome these issues. In this article, MPSoCSim, a SystemC simulator for Network-on-Chip (NoC) based MPSoCs is extended to support the simulation of reconfigurable MPSoCs. Processors, such as ARM and MicroBlaze, and peripheral models used within the virtual platform are provided by Imperas/OVP and attached to the NoC. Moreover, traffic generators are available to analyze the system. The virtual platform currently supports mesh topology with wormhole switching and several routing algorithms such as XY-, a minimal West-First algorithm, and an adaptive West-First algorithm. Amongst the impact of routing algorithms regarding performance, reconfiguration processes can be examined using the presented simulator. A mechanism for dynamic partial reconfiguration is implemented that is oriented towards the reconfiguration scheme on real FPGA platforms. It includes the simulation of the undefined behavior of the hardware region during reconfiguration and allows the adjustment of parameters. During runtime, dynamic partial reconfiguration interfaces are used to connect the Network-on-Chip infrastructure with reconfigurable regions. The configuration access ports can be modeled by the controller for the dynamic partial reconfiguration in form of an application programming interface. An additional SystemC component enables the readout of simulation time from within the application. For evaluation of the simulator timing and power consumption of the simulated hardware are estimated and compared with a real hardware implementation on a Xilinx Zynq FPGA. The comparison shows that the simulator improves the development of reconfigurable MPSoCs by early estimation of system requirements. The power estimations show a maximum deviation of 9mW at 1.9W total power consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Sauer:2016:LFD, author = "Christian Sauer and Hans-Peter Loeb", title = "A Lightweight Framework for the Dynamic Creation and Configuration of Virtual Platforms in {SystemC}", journal = j-TECS, volume = "16", number = "1", pages = "5:1--5:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2983626", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Virtual prototypes leverage SystemC/TLM for simulating programmable platforms comprising hundreds of modules. Their efficient creation and configuration is vital for acceptable turnaround times, for example, during performance exploration or software development. Therefore, our lightweight framework provides a factory that creates designs from abstract descriptions of module instances, properties, and connections. Modules mark properties as creation or runtime parameters. The resulting generic design descriptions are usable by non-experts and enable front-ends. The infrastructure is a small C++ library with only 1,350 lines of code that can be combined with existing SystemC/TLM models and simulation kernels. An industrial case study of a complex multiprocessor SoC shows a distinct productivity gain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Meyer:2016:SSC, author = "Rolf Meyer and Jan Wagner and Bastian Farkas and Sven Horsinka and Patrick Siegl and Rainer Buchty and Mladen Berekovic", title = "A Scriptable Standard-Compliant Reporting and Logging Framework for {SystemC}", journal = j-TECS, volume = "16", number = "1", pages = "6:1--6:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2983623", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the ever-increasing complexity of digital designs, debugging and evaluation face likewise increasing challenges. While recent advances in hardware/software co-simulation have been made, solutions for corresponding debugging and evaluation did not mature and improve in a similar fashion. In this article, we present a dedicated solution to ease the debugging and evaluation efforts, particularly focusing on full-system simulation. Improving significantly over existing solutions, the presented approach features a standards-compliant powerful and flexible method of deriving, logging, and filtering detailed status information from SystemC-based models. At the core of this approach are flexible scripting capabilities that may change all logging parameters during runtime, thus not requiring re-compiling the to-be-simulated model, as in many competing solutions. The approach is tested and benchmarked with a real-world full-system example, demonstrating the overall benefits. The presented solution is published as open source via github (see text) and, by strictly adhering to existing standards, is generally compatible with existing SystemC simulation environments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Murillo:2016:MSD, author = "Luis Gabriel Murillo and R{\`o}bert Lajos B{\"u}cs and Rainer Leupers and Gerd Ascheid", title = "{MPSoC} Software Debugging on Virtual Platforms via Execution Control with Event Graphs", journal = j-TECS, volume = "16", number = "1", pages = "7:1--7:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950052", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Virtual Platforms (VPs) are advantageous to develop and debug complex software for multi- and many-processor systems-on-chip (MPSoCs). VPs provide unrivaled controllability and visibility of the target, which can be exploited to examine bugs that cannot be reproduced easily in real hardware (e.g., bugs originating from races or happening during a processor stand-by state). However, VPs as employed in practice for debugging are generally underutilized. The accompanying debug ecosystem is based mostly on traditional tools, such as step-based debuggers and traces, that fall short to address the enormous complexity of modern MPSoCs and their parallel software. Finding a bug is still largely left to the developer's experience and intuition, using manual means rather than automated or systematic solutions that exploit the controllability and visibility of VPs. Profiting from VPs for MPSoC software debugging is an open question. To bridge this gap, this article presents a novel framework for debug visualization and execution control that, relying on the many benefits of VPs, helps to identify and test possible concurrency-related bug scenarios. The framework allows examining and steering the target system by manipulating an abstract graph that highlights relevant inter-component interactions and dependencies. The proposed framework reduces the effort required to understand complex concurrency patterns and helps to expose bugs. Its efficacy is demonstrated on (i) a shared memory symmetric multi-processing platform executing Linux and parallel benchmarks, and (ii) a distributed automotive system for driver assistance applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Sotiriou-Xanthopoulos:2016:FIA, author = "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis and Kostas Siozios and George Economakos and Dimitrios Soudris", title = "A Framework for Interconnection-Aware Domain-Specific Many-Accelerator Synthesis", journal = j-TECS, volume = "16", number = "1", pages = "8:1--8:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2983624", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many-accelerator Systems-on-Chip (SoC) have recently emerged as a promising platform paradigm that combines parallelization with heterogeneity, in order to cover the increasing demands for high performance and energy efficiency. To exploit the full potential of many-accelerator systems, automated design verification and analysis frameworks are required, targeted to both computational and interconnection optimization. Accurate simulation of interconnection schemes should use real stimuli, which are produced from fully functional nodes, requiring the prototyping of the processing elements and memories of the many-accelerator system. In this article, we argue that the Hierarchical Network-on-Chip (HNoC) scheme forms a very promising solution for many-accelerator systems in terms of scalability and data-congestion minimization. We present a parameterizable SystemC prototyping framework for HNoCs, targeted to domain-specific many-accelerator systems. The framework supports the prototyping of processing elements, memory modules, and underlying interconnection infrastructure, while it provides an API for their easy integration to the HNoC. Finally, it enables holistic system simulation using real node data. Using as a case study a many-accelerator system of an MRI pipeline, an analysis on the proposed framework is presented to demonstrate the impact of the system parameters on the system. Through extensive experimental analysis, we show the superiority of HNoC schemes in comparison to typical interconnection architectures. Finally, we show that, adopting the proposed many-accelerator design flow, significant performance improvements are achieved, from $ 1.2 \times $ up to $ 26 \times $, as compared to a x86 software implementation of the MRI pipeline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Zhu:2016:GES, author = "Dakai Zhu and Meikang Qiu and Samarjit Chakraborty", title = "Guest Editorial: Special Issue on Emerging Technologies in Embedded Software and Systems", journal = j-TECS, volume = "16", number = "1", pages = "9:1--9:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2991464", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Jayakumar:2016:SMV, author = "Hrishikesh Jayakumar and Arnab Raha and Vijay Raghunathan", title = "Sleep-Mode Voltage Scaling: Enabling {SRAM} Data Retention at Ultra-Low Power in Embedded Microcontrollers", journal = j-TECS, volume = "16", number = "1", pages = "10:1--10:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950054", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In heavily duty-cycled embedded systems, the energy consumed by the microcontroller in idle mode is often the bottleneck for battery lifetime. Existing solutions address this problem by placing the microcontroller in a low-power (sleep) mode when idle and preserving application state either by retaining the data in situ in Static Random Access Memory (SRAM) or by checkpointing it to F lash. However, both of these approaches have notable drawbacks. In situ data retention requires the SRAM to remain powered in sleep mode, while checkpointing to Flash involves significant energy and time overheads. This article proposes a new ultra-low-power sleep mode for microcontrollers that overcomes the limitations of both of these approaches. Our technique, Hypnos, is based on the key observation that the on-chip SRAM in a microcontroller exhibits 100\% data retention even at a much lower supply voltage (as much as $ 10 \times $ lower) than the typical operating voltage of the microcontroller. Hypnos exploits this observation by performing extreme voltage scaling when the microcontroller is in sleep mode. We implement and evaluate Hypnos for the TI MSP430G2452 microcontroller and show that the Microcontroller (MCU) draws only 26nA in the proposed sleep mode, which is $ 4 \times $ lower than a baseline sleep mode that preserves SRAM contents. Further, to reduce the overheads associated with performing the voltage scaling, we propose the use of an energy harvesting source for providing the scaled supply voltage and demonstrate (using a light sensing photodiode) that the current consumption in the proposed sleep mode can be reduced to 1nA, which is $ 100 \times $ lower than the current consumption in the baseline low-power mode. We also show that the decrease in sleep-mode power consumption translates to a reduction in application-level energy consumption by as much as $ 6.45 \times $. By decreasing the average power consumption to such minuscule levels, Hypnos takes a significant step forward in making perpetual systems a reality through the use of energy harvesting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Marz:2016:RPC, author = "Stephen Marz and Brad {Vander Zanden}", title = "Reducing Power Consumption and Latency in Mobile Devices Using an Event Stream Model", journal = j-TECS, volume = "16", number = "1", pages = "11:1--11:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2964203", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most consumer-based mobile devices use asynchronous events to awaken apps. Currently, event handling is implemented in either an application or an application framework such as Java's virtual machine (VM) or Microsoft's {.NET}, and it uses a ``polling loop'' that periodically queries an event queue to determine if an event has occurred. These loops must awaken the process, check for an event, and then put the process back to sleep many times per second. This constant arousal prevents the CPU from being put into a deep sleep state, which increases power consumption. Additionally, the process cannot check for events while it sleeps, and this delay in handling events increases latency, which is the time that elapses between when an event occurs and when the application responds to the event. We call this model of event handling a ``pull'' model because it needs to query hardware devices or software queues in order to ``pull'' events from them. Recent advances in input devices support direct, informative interrupts to the kernel when an event occurs. This allows us to develop a much more efficient event-handling model called the ``Event Stream Model'' (ESM). This model is a push model that allows a process to sleep as long as no event occurs but then immediately awakens a process when an event occurs. This model eliminates the polling loop, thus eliminating latency-inducing sleep between polls and reducing unnecessary power consumption. To work properly, the ESM model must be implemented in the kernel rather than in the application. In this article, we describe how we implemented the ESM model in Android operating system (OS). Our results show that with the event stream model, power consumption is reduced by up to 23.8\% in certain circumstances, and latency is reduced by an average of 13.6ms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Chen:2016:ICA, author = "Renhai Chen and Yi Wang and Jingtong Hu and Duo Liu and Zili Shao and Yong Guan", title = "Image-Content-Aware {I/O} Optimization for Mobile Virtualization", journal = j-TECS, volume = "16", number = "1", pages = "12:1--12:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950059", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Mobile virtualization introduces extra layers in software stacks, which leads to performance degradation. Notably, each I/O operation has to pass through several software layers to reach the NAND-flash-based storage systems. This article targets at optimizing I/O for mobile virtualization, since I/O becomes one of major performance bottlenecks that seriously affects the performance of mobile devices. Among all the I/O operations, a large percentage is to update metadata. Frequently updated metadata not only degrade overall I/O performance but also severely reduce flash memory lifetime. In this article, we propose a novel I/O optimization technique to identify the metadata of a guest file system that is stored in a virtual machine image file and frequently updated. Then, these metadata are stored in a small additional non-volatile memory (NVM), which is faster and more endurable to greatly improve flash memory's performance and lifetime. To the best of our knowledge, this is the first work to identify the file system metadata from regular data in a guest OS image file with NVM optimization. The proposed scheme is evaluated on a real hardware embedded platform. The experimental results show that the proposed techniques can improve write performance to 45.21\% in mobile devices with virtualization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Gu:2016:CPP, author = "Zonghua Gu and Chao Wang and Haibo Zeng", title = "Cache-Partitioned Preemption Threshold Scheduling", journal = j-TECS, volume = "16", number = "1", pages = "13:1--13:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950057", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For preemptive scheduling with shared cache, different tasks may cause interference in the shared cache, leading to Cache-Related Preemption Overhead (CRPD). Cache partitioning can be used to reduce or eliminate CRPD. We propose integration of cache partitioning and Preemption Threshold Scheduling to optimize schedulability for both Fixed-Priority and Earliest Deadline First scheduling algorithms on a uniprocessor. We let each subset of tasks assigned the same cache partition be a nonpreemptive group by assigning the same preemption threshold to them, which eliminates CRPD both within each cache partition and between different cache partitions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Hu:2016:AWM, author = "Biao Hu and Kai Huang and Gang Chen and Long Cheng and Alois Knoll", title = "Adaptive Workload Management in Mixed-Criticality Systems", journal = j-TECS, volume = "16", number = "1", pages = "14:1--14:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950058", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Due to the efficient resource usage of integrating tasks with different criticality onto a shared platform, the integration with mixed-criticality tasks is becoming an increasingly important trend in the design of real-time systems. One challenge in such a mixed-criticality system is to maximize the service for low-critical tasks, while meeting the timing constraints of high-critical tasks. In this article, we investigate how to adaptively manage the low-critical workload during runtime to meet both goals, that is, providing the service for low-critical tasks as much as possible and guaranteeing the hard real-time requirements for high-critical tasks. Unlike previous methods, which enforce an offline bound towards the low-critical workload, runtime adaptation approaches are proposed in which the incoming workload of low-critical tasks is adaptively regulated by considering the actual demand of high-critical tasks. This actual demand of the high-critical tasks, in turn, is adaptively updated using their historical arrival information. Based on this adaptation scheme, two scheduling policies-the priority-adjustment policy and the workload-shaping policy-are proposed to do the workload management. In order to reduce online management overhead, a lightweight scheme with $ O (n \cdot \log (n)) $ complexity is developed. Extensive simulation results are presented to demonstrate the effectiveness of our proposed workload management approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Tuncali:2016:APM, author = "Cumhur Erkan Tuncali and Georgios Fainekos and Yann-Hang Lee", title = "Automatic Parallelization of Multirate Block Diagrams of Control Systems on Multicore Platforms", journal = j-TECS, volume = "16", number = "1", pages = "15:1--15:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950055", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article addresses the problem of parallelizing model block diagrams for real-time embedded applications on multicore architectures. We describe a Mixed Integer Linear Programming formulation for finding a feasible mapping of the blocks to different CPU cores. For single-rate models, we use an objective function that minimizes the overall worst-case execution time. We introduce a set of heuristics to solve the problem for large models in a reasonable time. For multirate models, we solve the feasibility problem for finding a valid mapping. We study the scalability and efficiency of our approach with synthetic benchmarks and an engine controller from Toyota.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Fusella:2016:CAA, author = "Edoardo Fusella and Alessandro Cilardo", title = "Crosstalk-Aware Automated Mapping for Optical Networks-on-Chip", journal = j-TECS, volume = "16", number = "1", pages = "16:1--16:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2930666", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Optical networks-on-chip (NoCs) provide a promising answer to address the increasing requirements of ultra-high bandwidth and extremely low power consumption. Designing a photonic interconnect, however, involves a number of challenges that have no equivalent in the electronic domain, particularly the crosstalk noise, which affects the signal-to-noise ratio (SNR) possibly resulting in an inoperable architecture and hence constraining the network scalability. In this article, we point out the implications of application-driven task mapping on crosstalk effects. We motivate the main rationale of our work and provide a formalization of the problem. Then we propose a class of algorithms that automatically map the application tasks onto a generic mesh-based photonic NoC architecture such that the worst-case crosstalk is minimized. We also present a purpose-built experimental setup used for evaluating several architectural solutions in terms of crosstalk noise and SNR. The setup is used to collect extensive results from several real-world applications and case studies. The collected results show that the crosstalk noise can be significantly reduced by adopting our approach, thereby allowing higher network scalability, and can exhibit encouraging improvements over application-oblivious architectures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Iida:2016:GET, author = "Yuki Iida and Yusuke Fujii and Takuya Azumi and Nobuhiko Nishio and Shinpei Kato", title = "{GPUrpc}: Exploring Transparent Access to Remote {GPUs}", journal = j-TECS, volume = "16", number = "1", pages = "17:1--17:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950056", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are increasingly used for high-performance computing. Programming frameworks for general-purpose computing on GPUs (GPGPU), such as CUDA and OpenCL, are also maturing. Driving this trend is the recent proliferation of mobile devices such as smartphones and wearable computers. These devices are increasingly incorporating computationally intensive applications that involve some form of environmental recognition such as augmented reality (AR) or voice recognition. However, devices with low computational power cannot satisfy such demanding computing requirements. The CPU load of these devices could be reduced by offloading computation onto GPUs on the cloud. This paper presents GPUrpc, a remote procedure call (RPC) extension to Gdev, which is a rich set of runtime libraries and device drivers for achieving first-class GPU resource management. GPUrpc allows developers to use CUDA for GPGPU development work. Existing research uses RPCs based on the CUDA application programming interfaces (APIs); hence, all CUDA APIs require communication. To reduce communication overhead, we use an RPC based on a low-level API than CUDA API and reduced API that does not require communication. Our evaluation conducted on Linux and NVIDIA GPUs shows that the basic performance of our prototype implementation is reliable in comparison with the existing method. Evaluation using the Rodinia benchmark suite designed for research in heterogeneous parallel computing showed that GPUrpc is effective for applications such as image processing and data mining. GPUrpc also can improve power consumption to approximately 1/6 that of CPU processing for performing $ 512 \times 512 $ matrix multiplication.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Wang:2016:GTB, author = "Kun Wang and Miao Du and Dejun Yang and Chunsheng Zhu and Jian Shen and Yan Zhang", title = "Game-Theory-Based Active Defense for Intrusion Detection in Cyber-Physical Embedded Systems", journal = j-TECS, volume = "16", number = "1", pages = "18:1--18:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2886100", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-Physical Embedded Systems (CPESs) are distributed embedded systems integrated with various actuators and sensors. When it comes to the issue of CPES security, the most significant problem is the security of Embedded Sensor Networks (ESNs). With the continuous growth of ESNs, the security of transferring data from sensors to their destinations has become an important research area. Due to the limitations in power, storage, and processing capabilities, existing security mechanisms for wired or wireless networks cannot apply directly to ESNs. Meanwhile, ESNs are likely to be attacked by different kinds of attacks in industrial scenarios. Therefore, there is a need to develop new techniques or modify the current security mechanisms to overcome these problems. In this article, we focus on Intrusion Detection (ID) techniques and propose a new attack-defense game model to detect malicious nodes using a repeated game approach. As a direct consequence of the game model, attackers and defenders make different strategies to achieve optimal payoffs. Importantly, error detection and missing detection are taken into consideration in Intrusion Detection Systems (IDSs), where a game tree model is introduced to solve this problem. In addition, we analyze and prove the existence of pure Nash equilibrium and mixed Nash equilibrium. Simulations show that the proposed model can both reduce energy consumption by up to 50\% compared with the existing All Monitor (AM) model and improve the detection rate by up to 10\% to 15\% compared with the existing Cluster Head (CH) monitor model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Tan:2016:SSH, author = "Song Tan and Wen-Zhan Song and Steve Yothment and Junjie Yang and Lang Tong", title = "{ScorePlus}: a Software-Hardware Hybrid and Federated Experiment Environment for Smart Grid", journal = j-TECS, volume = "16", number = "1", pages = "19:1--19:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2964200", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present ScorePlus, a software-hardware hybrid and federated experiment environment for Smart Grid. ScorePlus incorporates both a software emulator and hardware testbed, such that they all follow the same architecture, and the same Smart Grid application program can be tested on either of them without any modification; ScorePlus provides a federated environment such that multiple software emulators and hardware testbeds at different locations are able to connect and form a unified Smart Grid system; ScorePlus software is encapsulated as a resource plugin in the OpenStack cloud computing platform, such that it supports massive deployments with large-scale test cases in cloud infrastructure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Majmudar:2016:AOR, author = "Charvi A. Majmudar and Bashir I. Morshed", title = "Autonomous {OA} Removal in Real-Time from Single Channel {EEG} Data on a Wearable Device Using a Hybrid Algebraic-Wavelet Algorithm", journal = j-TECS, volume = "16", number = "1", pages = "20:1--20:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2983629", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Electroencephalography (EEG) is a non-invasive technique to record brain activities in natural settings. Ocular Artifacts (OA) usually contaminates EEG signals, removal of which is critical for accurate feature extraction and classification. With the increasing adoption of wearable technologies, single-channel real-time EEG systems that often require real-time signal processing for immediate real-time feedback are becoming more prevalent. However, traditional OA removal algorithms usually require multiple channels of EEG data, are computationally expensive, and do not perform well in real-time. In this article, a new hybrid algorithm is proposed that autonomously detects OA and subsequently removes OA from a single-channel steaming EEG data in real-time. The proposed single EEG channel algorithm also does not require additional reference electrooculography (EOG) channel. The algorithm has also been implemented on an embedded hardware platform of single channel wearable EEG system (NeuroMonitor). The algorithm first detects the OA zones using an Algebraic approach and then removes these artifacts from the detected OA zones using the Discrete Wavelet Transform (DWT) decomposition method. The de-noising technique is applied only to the OA zone, which minimizes loss of neural information outside the OA zone. A qualitative and quantitative performance evaluation was carried out with a 0.5s epoch in overlapping sliding window technique using time-frequency analysis, mean square coherence, and correlation coefficient statistics. The hybrid OA removal algorithm demonstrated real-time operation with 3s latency on the PSoC-3-microcontroller-based EEG system. Successful implementation of OA removal from single-channel real-time EEG data using the proposed algorithm shows promise for real-time feedback applications of wearable EEG devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Kuan:2016:SEI, author = "Yuan-Hung Kuan and Yuan-Hao Chang and Tseng-Yi Chen and Po-Chun Huang and Kam-Yiu Lam", title = "Space-Efficient Index Scheme for {PCM}-Based Multiversion Databases in Cyber-Physical Systems", journal = j-TECS, volume = "16", number = "1", pages = "21:1--21:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950060", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we study the indexing problem of using PCM as the storage medium for embedded multiversion databases in cyber-physical systems (CPSs). Although the multiversion B$^+$ -tree (MVBT) index has been shown to be efficient in managing multiple versions of data items in a database, MVBT is designed for databases residing in traditional block-oriented storage devices. It can have serious performance problems when the databases are on phase-change memory (PCM). Since the embedded multiversion database in CPSs may have limited storage space and are update intensive, to resolve the problems of MVBT of lack of space efficiency and heavy update cost, we propose a new index scheme, called space-efficient multiversion index (SEMI), to enhance the space utilization and access performance in serving various types of queries. In SEMI, since the number of keys in the database may be small, instead of using a B -tree index, we propose to use a binary-search tree to organize the index keys. Furthermore, multiple versions of the same data item may be stored consecutively and indexed by a single entry to maximize the space utilization and at the same time to enhance the performance in serving version-range queries. Analytical studies have been conducted on SEMI, and a series of experiments have been performed to evaluate its performance as compared with MVBT under different workloads. The experimental results have demonstrated that SEMI can achieve very high space utilization and has better performance in serving update transactions and range queries as compared with MVBT.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Kartal:2016:MDR, author = "Yusuf Bora Kartal and Ece G{\"u}ran Schmidt and Klaus Werner Schmidt", title = "Modeling Distributed Real-Time Systems in {TIOA} and {UPPAAL}", journal = j-TECS, volume = "16", number = "1", pages = "22:1--22:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2964202", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The mission- and life-critical properties of distributed real-time systems require concurrent modeling, analysis, and formal verification in the design stage. The timed input/output automata (TIOA) framework and the UPPAAL software package are two widely used modeling and verification tools for this purpose. To this end, we develop the algorithm TUConvert for converting distributed TIOA models to UPPAAL behavioral models and formally prove its correctness. We demonstrate the applicability of our algorithm by the formal verification of a distributed real-time industrial communication protocol that is modeled by TIOA.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Asyaban:2016:ASB, author = "Sedigheh Asyaban and Mehdi Kargahi and Lothar Thiele and Morteza Mohaqeqi", title = "Analysis and Scheduling of a Battery-Less Mixed-Criticality System with Energy Uncertainty", journal = j-TECS, volume = "16", number = "1", pages = "23:1--23:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2964201", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider a battery-less real-time embedded system equipped with an energy harvester. It scavenges energy from an environmental resource according to some stochastic patterns. The success of jobs is threatened in the case of energy shortage, which might be due to lack of harvested energy, losses originated from the super-capacitor self-discharge, as well as power consumption of executed tasks. The periodic real-time tasks of the system follow a dual-criticality model. In addition, each task has a minimum required success ratio that needs to be satisfied in steady state. We analytically evaluate the behavior of such a system in terms of its energy-related success ratio for a given schedule. Based on these results, we propose a scheduling algorithm that satisfies both temporal and success-ratio constraints of the jobs, while respecting task criticalities and corresponding system modes. The accuracy of the analytical method as well as its dependence on the numerical computations and other model assumptions are extensively discussed through comparison with simulation results. Also, the efficacy of the proposed scheduling algorithm is studied through comparison to some existing non-mixed- and mixed-criticality scheduling algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Manna:2016:ITS, author = "Kanchan Manna and Shivam Swami and Santanu Chattopadhyay and Indranil Sengupta", title = "Integrated Through-Silicon Via Placement and Application Mapping for {$3$D} Mesh-Based {NoC} Design", journal = j-TECS, volume = "16", number = "1", pages = "24:1--24:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2968446", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a solution to the integrated problem of Through-Silicon Via (TSV) placement and mapping of cores to the routers in a three-dimensional mesh-based Network-on-Chip (NoC) system. TSV geometry restricts their number in three-dimensional (3D) ICs. As a result, only about 25\% of routers in a 3D NoC can possess vertical connections. Mapping plays an important role in evolving good system solutions in such a situation. TSVs have been placed with detailed consultation with the application mapping process. The integrated problem was first solved using the exact method of Integer Liner Programming (ILP). Next, a solution was obtained via a Particle Swarm Optimization (PSO) formulation. Several augmentations to the basic PSO strategy have been proposed to generate good-quality solutions. The results obtained are better than many of the contemporary approaches and close to the theoretical situation in which all routers are 3D in nature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Tajik:2016:SRS, author = "Hossein Tajik and Bryan Donyanavard and Nikil Dutt and Janmartin Jahn and J{\"o}rg Henkel", title = "{SPMPool}: Runtime {SPM} Management for Memory-Intensive Applications in Embedded Many-Cores", journal = j-TECS, volume = "16", number = "1", pages = "25:1--25:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2968447", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Distributed Scratchpad Memories (SPMs) in embedded many-core systems require careful selection of data placement to achieve good performance. Applications mapped to these platforms have varying memory requirements based on their runtime behavior, resulting in under- or overutilization of the local SPMs. We propose SPMPool to share the available on-chip SPMs on many-cores among concurrently executing applications in order to reduce the overall memory access latency. By pooling SPM resources, we can assign underutilized memory resources, due to idle cores or low memory usage, to applications dynamically. SPMPool is the first workload-aware SPM mapping solution for many-cores that dynamically allocates data at runtime-using profiled data-to address the unpredictable set of concurrently executing applications. Our experiments on workloads with varying interapplication memory intensity show that SPMPool can achieve up to 76\% reduction in memory access latency for configurations ranging from 16 to 256 cores, compared to the traditional approach that limits executing cores to use their local SPMs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Schurmans:2016:FAE, author = "Stefan Sch{\"u}rmans and Gereon Onnebrink and Rainer Leupers and Gerd Ascheid and Xiaotao Chen", title = "Frequency-Aware {ESL} Power Estimation for {ARM Cortex-A9} Using a Black Box Processor Model", journal = j-TECS, volume = "16", number = "1", pages = "26:1--26:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2987375", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Power estimation has become a strongly desired feature in Electronic System Level (ESL) simulations. Most existing power estimation approaches for this abstraction level require component models with observable internals. However, most ESL models of modern processors are delivered as black box components. This work presents a tool-based ESL power estimation methodology for black box models and its extension for multiple clock frequencies. The evaluation uses hardware measurements of the ARM Cortex-A9 subsystem of the OMAP4460 chip for reference. The achieved estimation error is 5\% on average for fixed-frequency power models and 7\% for multifrequency power models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Weinstock:2016:PSS, author = "Jan Henrik Weinstock and Luis Gabriel Murillo and Rainer Leupers and Gerd Ascheid", title = "Parallel {SystemC} Simulation for {ESL} Design", journal = j-TECS, volume = "16", number = "1", pages = "27:1--27:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2987374", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Virtual platforms have become essential tools for the design of embedded systems. Developers rely on them for design space exploration and software debugging. However, with rising HW/SW complexity and the need to simulate more and more processors simultaneously, the performance of virtual platforms degrades rapidly. Parallel simulation techniques can help to counter this by leveraging multicore PCs, which are widely available today. This work presents a novel parallel simulation approach that is targeted toward acceleration of virtual platforms from the ESL domain. By trading some timing accuracy, multiprocessor virtual platforms can be accelerated by up to $ 3.4 \times $ on regular quad-core workstations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Shukla:2017:ECC, author = "Sandeep K. Shukla", title = "Editorial: Continuing the Course", journal = j-TECS, volume = "16", number = "2", pages = "28:1--28:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3043965", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fischmeister:2017:GES, author = "Sebastian Fischmeister and Jason Xue", title = "Guest Editorial: Special Issue on {LCTES 2015}", journal = j-TECS, volume = "16", number = "2", pages = "29:1--29:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3041038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cunha:2017:DSC, author = "Marcos Aur{\'e}lio Pinto Cunha and Omayma Matoussi and Fr{\'e}d{\'e}ric P{\'e}trot", title = "Detecting Software Cache Coherence Violations in {MPSoC} Using Traces Captured on Virtual Platforms", journal = j-TECS, volume = "16", number = "2", pages = "30:1--30:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990193", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software cache coherence schemes tend to be the solution of choice in dedicated multi/many core systems on chip, as they make the hardware much simpler and predictable. However, despite the developers' effort, it is hard to make sure that all preventive measurements are taken to ensure coherence. In this work, we propose a method to identify the potential cache coherence violations using traces obtained from virtual platforms. These traces contain causality relations among events, which allow first to simplify the analysis, and second to avoid relying on timestamps. Our method identifies potential violations that may occur during a given execution for write-through and write-back cache policies. Therefore, it is independent of the software coherence protocol. We conducted experiments on parallel applications running on a lightweight SMP operating system, and we were able to detect coherence issues that we could then solve.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zheng:2017:DDC, author = "Wenguang Zheng and Hui Wu", title = "Dynamic Data-Cache Locking for Minimizing the {WCET} of a Single Task", journal = j-TECS, volume = "16", number = "2", pages = "31:1--31:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2994602", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Caches have been widely used in modern embedded processors to bridge the increasing speed gap between processors and off-chip memory. In real-time embedded systems, computing the Worst-Case Execution Time (WCET) of a task is essential for the task scheduler to construct a valid schedule for a task set. Unfortunately, caches make it much harder to compute the WCET of a task. Cache locking has been proposed to alleviate the timing unpredictability problem caused by caches. In this article, we investigate the following WCET-aware data-cache locking problem for a single task. Given a task, select a set of variables as locked cache contents such that the WCET of the task is minimized. We propose two dynamic full cache-locking approaches. The first formulates the problem as a global Integer Linear Programming (ILP) problem that simultaneously selects a minimum set of memory blocks of variables as locked cache contents and allocates them to the data cache. The second iteratively constructs a subgraph of the Control Flow Graph (CFG) of the task in which the lengths of all the paths are close to the longest path length, uses an ILP formulation to select a minimum set of memory blocks of variables in the subgraph as locked cache contents, and allocates the selected memory blocks to the data cache. We also propose two novel, efficient data-cache allocation algorithms for the global ILP approach and the iterative ILP approach, respectively. We have implemented both approaches and compared them with two state-of-the-art approaches, the longest path-based dynamic cache-locking approach and the static WCET analysis approach without cache locking by using a set of benchmarks from the M{\"a}lardalen WCET benchmark suite, SNU real-time benchmarks, and Powerstone benchmarks. Compared to the static WCET analysis approach, the average WCET improvements of the first approach range between 11.4\% and 26.4\%. Compared to the longest path--based, dynamic cache-locking approach, the average WCET improvements of the first approach range between 5.0\% and 15.4\%. The second approach performs slightly better than the first approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:CDS, author = "Qingrui Liu and Changhee Jung and Dongyoon Lee and Devesh Tiwari", title = "Compiler-Directed Soft Error Detection and Recovery to Avoid {DUE} and {SDC} via {Tail-DMR}", journal = j-TECS, volume = "16", number = "2", pages = "32:1--32:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2930667", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents Clover, a compiler-directed soft error detection and recovery scheme for lightweight soft error resilience. The compiler carefully generates soft-error-tolerant code based on idempotent processing without explicit checkpoints. During program execution, Clover relies on a small number of acoustic wave detectors deployed in the processor to identify soft errors by sensing the wave made by a particle strike. To cope with DUEs (detected unrecoverable errors) caused by the sensing latency of error detection, Clover leverages a novel selective instruction duplication technique called tail-DMR (dual modular redundancy) that provides a region-level error containment. Once a soft error is detected by either the sensors or the tail-DMR, Clover takes care of the error as in the case of exception handling. To recover from the error, Clover simply redirects program control to the beginning of the code region where the error is detected. The experimental results demonstrate that the average runtime overhead is only 26\%, which is a 75\% reduction compared to that of the state-of-the-art soft error resilience technique. In addition, this article evaluates an alternative technique called tail-wait, comparing it to Clover. According to the evaluation with the different processor configurations and the various error detection latencies, Clover turns out to be a superior technique, achieving 1.06 to 3.49 $ \times $ speedup over the tail-wait.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Procter:2017:PAS, author = "Adam Procter and William L. Harrison and Ian Graves and Michela Becchi and Gerard Allwein", title = "A Principled Approach to Secure Multi-core Processor Design with {ReWire}", journal = j-TECS, volume = "16", number = "2", pages = "33:1--33:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967497", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "There is no such thing as high assurance without high assurance hardware. High assurance hardware is essential because any and all high assurance systems ultimately depend on hardware that conforms to, and does not undermine, critical system properties and invariants. And yet, high assurance hardware development is stymied by the conceptual gap between formal methods and hardware description languages used by engineers. This article advocates a semantics-directed approach to bridge this conceptual gap. We present a case study in the design of secure processors, which are formally derived via principled techniques grounded in functional programming and equational reasoning. The case study comprises the development of secure single- and dual-core variants of a single processor, both based on a common semantic specification of the ISA. We demonstrate via formal equational reasoning that the dual-core processor respects a ``no-write-down'' information flow policy. The semantics-directed approach enables a modular and extensible style of system design and verification. The secure processors require only a very small amount of additional code to specify and implement, and their security verification arguments are concise and readable. Our approach rests critically on ReWire, a functional programming language providing a suitable foundation for formal verification of hardware designs. This case study demonstrates both ReWire's expressiveness as a programming language and its power as a framework for formal, high-level reasoning about hardware systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2017:ESS, author = "Li-Pin Chang and Po-Han Sung and Po-Tsang Chen and Po-Hung Chen", title = "Eager Synching: a Selective Logging Strategy for Fast {\tt fsync()} on Flash-Based {Android} Devices", journal = j-TECS, volume = "16", number = "2", pages = "34:1--34:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2930668", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Flash storage has been a standard component in Android devices. Recent research has reported that application data management in Android involves frequent fsync() operations. The current fsync() implementations, including those of ext4 and F2FS, have several common drawbacks. Specifically, ext4 commits a transaction every time to sync a file, whereas F2FS commits a checkpoint to sync a directory. Committing a transaction or checkpoint flushes all dirty data from the page cache to the flash storage via many small, random block write requests. The resultant high I/O frequency and excessive write traffic cause a high fsync() latency. This study presents an efficient fsync() method, called eager synching, which is based on a simple idea: write less, and write sequentially. To sync a file, eager synching writes only a subset of all dirty data in the page cache to a sequential log space using a few sequential block write requests. It does not involve transaction or checkpoint committing. We successfully implemented eager synching in ext4 and F2FS, and our experimental results show that, compared with the original fsync() methods of ext4 and F2FS, eager synching reduced the average and maximum fsync() latencies by up to 72\% and 91\%, respectively, block-level write traffic by up to 35\%, and I/O frequency by up to 66\%. Through enhanced crash recovery procedures, eager synching can successfully recover all previously synched files while still guaranteeing the file system integrity. We also conducted live application replays using the proposed eager synching approach and observed that this approach significantly improved the application frame updating rate and application execution time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dietrich:2017:GOF, author = "Christian Dietrich and Martin Hoffmann and Daniel Lohmann", title = "Global Optimization of Fixed-Priority Real-Time Systems by {RTOS}-Aware Control-Flow Analysis", journal = j-TECS, volume = "16", number = "2", pages = "35:1--35:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2950053", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber--physical systems typically target a dedicated purpose; their embedded real-time control system, such as an automotive control unit, is designed with a well-defined set of functionalities. On the software side, this results in a large amount of implicit and explicit static knowledge about the system and its behavior already at compile time. Compilers have become increasingly better at extracting and exploiting such static knowledge. For instance, many optimizations have been lifted up to the interprocedural or even to the whole-program level. However, whole-program optimizations generally stop at the application--kernel boundary: control-flow transitions between different threads are not yet analyzed. In this article, we cross the application--kernel boundary by combining the semantics of a real-time operating system (RTOS) with deterministic fixed-priority scheduling (e.g., OSEK/AUTOSAR, ARINC 653, $ \mu $ITRON, POSIX.4) and the explicit application knowledge to enable system-wide, flow-sensitive compiler optimizations. We present two methods to extract a cross-kernel, control-flow--graph that provides a global view on all possible execution paths of a real-time system. Having this knowledge at hand, we tailor the operating system kernel more closely to the particular application scenario. For the example of a real-world safety-critical control system, we present three possible use cases. (1) Runtime optimizations, by means of specialized system calls for each call site, allow one speed up the kernel execution path by 28\% in our benchmark scenario. Furthermore, we target transient hardware fault tolerance with two automated software-based countermeasures: (2) generation of OS state assertions on the expected system behavior, and (3) a system-wide dominator-region based control-flow error detection, both of which leverage significant robustness improvements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:MCS, author = "Jing Liu and Kenli Li and Dakai Zhu and Jianjun Han and Keqin Li", title = "Minimizing Cost of Scheduling Tasks on Heterogeneous Multicore Embedded Systems", journal = j-TECS, volume = "16", number = "2", pages = "36:1--36:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2935749", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cost savings are very critical in modern heterogeneous computing systems, especially in embedded systems. Task scheduling plays an important role in cost savings. In this article, we tackle the problem of scheduling tasks on heterogeneous multicore embedded systems with the constraints of time and resources for minimizing the total cost, while considering the communication overhead. This problem is NP-hard and we propose several heuristic techniques- ISGG, RLD, and RLDG -to address the problem. Experimental results show that the proposed algorithms significantly outperform the existing approaches in terms of cost savings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Llopard:2017:FPA, author = "Ivan Llopard and Christian Fabre and Albert Cohen", title = "From a Formalized Parallel Action Language to Its Efficient Code Generation", journal = j-TECS, volume = "16", number = "2", pages = "37:1--37:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990195", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modeling languages propose convenient abstractions and transformations to handle the complexity of today's embedded systems. Based on the formalism of the Hierarchical State Machine, they enable the expression of hierarchical control parallelism. However, they face two important challenges when it comes to modeling data-intensive applications: no unified approach that also accounts for data-parallel actions and no effective code optimization and generation flows. We propose a modeling language extended with parallel action semantics and hierarchical indexed-state machines suitable for computationally intensive applications. Together with its formal semantics, we present an optimizing model compiler aiming for the generation of efficient data-parallel implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Manilov:2017:FRS, author = "Stanislav Manilov and Bj{\"o}rn Franke and Anthony Magrath and Cedric Andrieu", title = "{Free Rider}: a Source-Level Transformation Tool for Retargeting Platform-Specific Intrinsic Functions", journal = j-TECS, volume = "16", number = "2", pages = "38:1--38:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990194", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Short-vector S imd and Dsp instructions are popular extensions to common Isas. These extensions deliver excellent performance and compact code for some compute-intensive applications, but they require specialized compiler support. To enable the programmer to explicitly request the use of such an instruction, many C compilers provide platform-specific intrinsic functions, whose implementation is handled specially by the compiler. The use of such intrinsics, however, inevitably results in nonportable code. In this article, we develop a novel methodology for retargeting such nonportable code, which maps intrinsics from one platform to another, taking advantage of similar intrinsics on the target platform. We employ a description language to specify the signature and semantics of intrinsics and perform graph-based pattern matching and high-level code transformations to derive optimized implementations exploiting the target's intrinsics, wherever possible. We demonstrate the effectiveness of our new methodology, implemented in the Free Rider tool, by automatically retargeting benchmarks derived from OpenCV samples and a complex embedded application optimized to run on an Arm Cortex-M4 to an Intel Edison module with Sse4.2 instructions (and vice versa). We achieve a speedup of up to 3.73 over a plain C baseline, and on average 96.0\% of the speedup of manually ported and optimized versions of the benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zeng:2017:SLD, author = "Jing Zeng and Laurence T. Yang and Man Lin and Zili Shao and Dakai Zhu", title = "System-Level Design Optimization for Security-Critical Cyber-Physical-Social Systems", journal = j-TECS, volume = "16", number = "2", pages = "39:1--39:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2925991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical-social systems (CPSS), an emerging computing paradigm, have attracted intensive attentions from the research community and industry. We are facing various challenges in designing secure, reliable, and user-satisfied CPSS. In this article, we consider these design issues as a whole and propose a system-level design optimization framework for CPSS design where energy consumption, security-level, and user satisfaction requirements can be fulfilled while satisfying constraints for system reliability. Specifically, we model the constraints (energy efficiency, security, and reliability) as the penalty functions to be incorporated into the corresponding objective functions for the optimization problem. A smart office application is presented to demonstrate the feasibility and effectiveness of our proposed design optimization approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Haar:2017:MGE, author = "Stefan Haar and Roland Meyer", title = "Message from the {Guest Editors}", journal = j-TECS, volume = "16", number = "2", pages = "40:1--40:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3037413", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bujtor:2017:TPD, author = "Ferenc Bujtor and Lev Sorokin and Walter Vogler", title = "Testing Preorders for {dMTS}: Deadlock- and the New {Deadlock-\slash Divergence Testing}", journal = j-TECS, volume = "16", number = "2", pages = "41:1--41:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2984641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Testing preorders on component specifications ensure that replacing a specification by a refined one does not introduce unwanted behavior in an overall system. Considering deadlocks as unwanted, the preorder can be characterized by a failure semantics on Labeled Transition Systems (LTSs). In previous work, we have generalized this to Modal Transition Systems (MTSs) with a new, MTS-specific testing idea. In the present article, we generalize this idea further to DMTS, a subclass of disjunctive MTSs. On the one hand, the testing preorder can be characterized by the same failure semantics, and dMTS have no additional expressivity in our setting. On the other hand, the technical treatment is significantly harder and, surprisingly, the preorder is not compositional. Furthermore, we regard deadlocks and divergence (infinite unobservable runs) as unwanted and characterize the testing preorder with an unusual failure-divergence semantics. This preorder is already on LTSs strictly coarser-and hence arguably better-than the traditional failure-divergence preorder. It is a precongruence on dMTS, also for hiding, and much easier to handle than the deadlock-based preorder. It arises as well from a new variant of De Nicola's and Hennessy's must-testing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vijzelaar:2017:MVS, author = "Stefan Vijzelaar and Wan Fokkink", title = "Multi-valued Simulation and Abstraction Using Lattice Operations", journal = j-TECS, volume = "16", number = "2", pages = "42:1--42:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012282", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Abstractions can cause spurious results, which need to be verified in the concrete system to gain conclusive results. Verification based on a multi-valued logic can distinguish between conclusive and inconclusive results, provides increased precision, and allows for encoding additional information into the model. To ensure a correct abstraction, one can use a mixed simulation [Meller et al. 2009]. We extend mixed simulation to include inconsistent values, thereby resolving an asymmetry and allowing for abstractions with increased precision when inconsistent values are available. In addition, we present a set of abstraction rules, compatible with the extended notion, for constructing abstract models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Andre:2017:PPO, author = "{\'E}tienne Andr{\'e} and Thomas Chatain and C{\'e}sar Rodr{\'\i}guez", title = "Preserving Partial-Order Runs in Parametric Time {Petri} Nets", journal = j-TECS, volume = "16", number = "2", pages = "43:1--43:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012283", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Parameter synthesis for timed systems aims at deriving parameter valuations satisfying a given property. In this article, we target concurrent systems. We use partial-order semantics for parametric time Petri nets as a way to both cope with the well-known state-space explosion due to concurrency and significantly enhance the result of an existing synthesis algorithm. Given a reference parameter valuation, our approach synthesizes other valuations preserving the partial-order executions of the reference parameter valuation. We show the applicability of our approach using a tool applied to asynchronous circuits.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Berard:2017:NIP, author = "B{\'e}atrice B{\'e}rard and Lo{\"\i}c H{\'e}lou{\"e}t and John Mullins", title = "Non-interference in Partial Order Models", journal = j-TECS, volume = "16", number = "2", pages = "44:1--44:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2984639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Non-interference (NI) is a property of systems stating that confidential actions should not cause effects observable by unauthorized users. Several variants of NI have been studied for many types of models but rarely for true concurrency or unbounded models. This work investigates NI for High-level Message Sequence Charts (HMSCs), a scenario language for the description of distributed systems, based on composition of partial orders. We first propose a general definition of security properties in terms of equivalence among observations of behaviors. Observations are naturally captured by partial order automata, a formalism that generalizes HMSCs and permits assembling partial orders. We show that equivalence or inclusion properties for HMSCs (and hence for partial order automata) are undecidable, which means in particular that NI is undecidable for HMSCs. We hence consider decidable subclasses of partial order automata and HMSCs. Finally, we define weaker local properties, describing situations where a system is attacked by a single agent, and show that local NI is decidable. We then refine local NI to a finer notion of causal NI that emphasizes causal dependencies between confidential actions and observations and extend it to causal NI with (selective) declassification of confidential events. Checking whether a system satisfies local and causal NI and their declassified variants are PSPACE-complete problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saarikivi:2017:MTS, author = "Olli Saarikivi and Hern{\'a}n Ponce-De-Le{\'o}n and Kari K{\"a}hk{\"o}nen and Keijo Heljanko and Javier Esparza", title = "Minimizing Test Suites with Unfoldings of Multithreaded Programs", journal = j-TECS, volume = "16", number = "2", pages = "45:1--45:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012281", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article focuses on computing minimal test suites for multithreaded programs. Based on previous work on test case generation for multithreaded programs using unfoldings, this article shows how this unfolding can be used to generate minimal test suites covering all local states of the program. Generating such minimal test suites is shown to be NP-complete in the size of the unfolding. We propose an SMT encoding for this problem and two methods based on heuristics which only approximate the solution, but scale better in practice. Finally, we apply our methods to compute the minimal test suites for several benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Valmari:2017:SIS, author = "Antti Valmari", title = "Stop It, and Be Stubborn!", journal = j-TECS, volume = "16", number = "2", pages = "46:1--46:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012279", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This publication discusses how automatic verification of concurrent systems can be made more efficient by focusing on always may-terminating systems. First, making a system always may-terminating is a method for meeting a modelling need that exists independently of this publication. It is illustrated that without doing so, non-progress errors may be lost. Second, state explosion is often alleviated with stubborn, ample, and persistent set methods. They use expensive cycle or terminal strong component conditions in many cases. It is proven that for many important classes of properties, if the systems are always may-terminating, then these conditions can be left out.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Isenberg:2017:IIV, author = "Tobias Isenberg", title = "Incremental Inductive Verification of Parameterized Timed Systems", journal = j-TECS, volume = "16", number = "2", pages = "47:1--47:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2984640", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose and extend an approach for the verification of safety properties for parameterized timed systems modeled as networks of timed automata. For this task, we introduce an incremental workflow that is based on our algorithm IC3 with Zones. It proceeds in a cycle in which single models of the system are verified, and the verification results are employed for the reasoning about the entire system. Starting with the smallest instances, the verification of the safety property is carried out fast and efficient. On successful verification, the algorithm produces an inductive strengthening of the safety property. We reuse this result and try to reason about the entire parameterized timed system. To this end, we extrapolate the inductive strengthening into a candidate for the next-larger model. In case this candidate is a valid inductive strengthening for the next larger model, our main theorem reasons about all models of the parameterized timed system, stating that the safety property holds true for all models. Otherwise, the main cycle starts over with the verification of the next larger model. This workflow is iterated indefinitely, until able to reason about the entire parameterized timed system, until a counterexample trace is found, or until the single models become too large to be handled in the verification. We reuse the intermediate results in a Feedback -loop in order to accelerate the verification runs for the single models. Furthermore, we consider an extended formalism in comparison to our previous publications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Siirtola:2017:WDW, author = "Antti Siirtola and Stavros Tripakis and Keijo Heljanko", title = "When Do We Not Need Complex Assume-Guarantee Rules?", journal = j-TECS, volume = "16", number = "2", pages = "48:1--48:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012280", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We study the need for complex circular assume-guarantee (AG) rules in formalisms that already provide the simple precongruence rule. We first investigate the question for two popular formalisms: Labeled Transition Systems (LTSs) with weak simulation and Interface Automata (IA) with alternating simulation. We observe that, in LTSs, complex circular AG rules cannot always be avoided, but, in the IA world, the simple precongruence rule is all we need. Based on these findings, we introduce modal IA with cut states, a novel formalism that not only generalizes IA and LTSs but also allows for compositional reasoning without complex AG rules.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tang:2017:TFC, author = "Qi Tang and Twan Basten and Marc Geilen and Sander Stuijk and Ji-Bo Wei", title = "{Task-FIFO} Co-Scheduling of Streaming Applications on {MPSoCs} with Predictable Memory Hierarchy", journal = j-TECS, volume = "16", number = "2", pages = "49:1--49:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3038484", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article studies the scheduling of real-time streaming applications on multiprocessor systems-on-chips with predictable memory hierarchy. An iteration-based task-FIFO co-scheduling framework is proposed for this problem. We obtain FIFO size distributions using Pareto space searching, based on which the task-to-processor mapping is obtained with the potential FIFO allocation being taken into account; then, the FIFO-to-memory allocation is optimized to minimize the total memory access cost; finally, a self-timed throughput analysis method that considers memory and direct memory access controller contention is utilized to analyze the throughput. Our methods are validated by a set of synthesized and practical applications on different platforms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Choo:2017:EDF, author = "Kim-Kwang Raymond Choo and Yunsi Fei and Yang Xiang and Yu Yu", title = "Embedded Device Forensics and Security", journal = j-TECS, volume = "16", number = "2", pages = "50:1--50:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3015662", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "While the increasing digitalization of our society and amalgamation of embedded devices into the ever-increasing facets of our daily life (e.g., in smart and intelligent vehicles, smart cities and smart nations, and critical infrastructure sectors) have resulted in improved productivity and quality of life, the trend has also resulted in a trend of increasing frequency and sophistication of cyber exploitation and cyber threats. Hence, there is a need for coordinated efforts from the research community to address resulting concerns using both cryptographic and non-cryptographic solutions, such as those presented in this special section.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Puthal:2017:DDK, author = "Deepak Puthal and Surya Nepal and Rajiv Ranjan and Jinjun Chen", title = "{DLSeF}: a Dynamic Key-Length-Based Efficient Real-Time Security Verification Model for Big Data Stream", journal = j-TECS, volume = "16", number = "2", pages = "51:1--51:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2937755", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Applications in risk-critical domains such as emergency management and industrial control systems need near-real-time stream data processing in large-scale sensing networks. The key problem is how to ensure online end-to-end security (e.g., confidentiality, integrity, and authenticity) of data streams for such applications. We refer to this as an online security verification problem. Existing data security solutions cannot be applied in such applications as they cannot deal with data streams with high-volume and high-velocity data in real time. They introduce a significant buffering delay during security verification, resulting in a requirement for a large buffer size for the stream processing server. To address this problem, we propose a Dynamic Key-Length-Based Security Framework (DLSeF) based on a shared key derived from synchronized prime numbers; the key is dynamically updated at short intervals to thwart potential attacks to ensure end-to-end security. Theoretical analyses and experimental results of the DLSeF framework show that it can significantly improve the efficiency of processing stream data by reducing the security verification time and buffer usage without compromising security.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Song:2017:SSI, author = "Jun Song and Fan Yang and Kim-Kwang Raymond Choo and Zhijian Zhuang and Lizhe Wang", title = "{SIPF}: a Secure Installment Payment Framework for Drive-Thru {Internet}", journal = j-TECS, volume = "16", number = "2", pages = "52:1--52:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3014584", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Ensuring the security and privacy of vehicular ad hoc networks (VANETs) and related services such as secure payment has been the focus of recent research efforts. Existing secure payment solutions generally require stable and reliable network connection. This is, however, a challenge in a VANET setting. Drive-thru Internet, a secure payment solution for VANETs, involves a great number of fast-moving vehicles competing for connections/communications simultaneously. Thus, service providers may find it challenging to provide real-time payment services or may have to sacrifice the confidentiality and the authenticity of payment vouchers for usability. In this article, we propose a secure installment payment framework for drive-thru Internet deployment in a VANET setting. The framework also provides the capability to embody properties such as confidentiality of payment vouchers, offline signature verification, periodical reconciliation, and installment payment. Performance evaluation and security analysis demonstrate the utility of the framework in a VANET setting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:EEC, author = "Zhe Liu and Jian Weng and Zhi Hu and Hwajeong Seo", title = "Efficient Elliptic Curve Cryptography for Embedded Devices", journal = j-TECS, volume = "16", number = "2", pages = "53:1--53:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967103", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many resource-constrained embedded devices, such as wireless sensor nodes, require public key encryption or a digital signature, which has induced plenty of research on efficient and secure implementation of elliptic curve cryptography (ECC) on 8-bit processors. In this work, we study the suitability of a special class of finite fields, called optimal prime fields (OPFs), for a ``lightweight'' ECC implementation with a view toward high performance and security. First, we introduce a highly optimized arithmetic library for OPFs that includes two implementations for each finite field arithmetic operation, namely a performance-optimized version and a security-optimized variant. The latter is resistant against simple power analysis attacks in the sense that it always executes the same sequence of instructions, independent of the operands. Based on this OPF library, we then describe a performance-optimized and a security-optimized implementation of scalar multiplication on the elliptic curve over OPFs at several security levels. The former uses the Gallant-Lambert-Vanstone method on twisted Edwards curves and reaches an execution time of 3.14M cycles (over a 160-bit OPF) on an 8-bit ATmega128 processor, whereas the latter is based on a Montgomery curve and executes in 5.53M cycles.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fu:2017:DFA, author = "Shan Fu and Guoai Xu and Juan Pan and Zongyue Wang and An Wang", title = "Differential Fault Attack on {ITUbee} Block Cipher", journal = j-TECS, volume = "16", number = "2", pages = "54:1--54:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2967610", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Differential Fault Attack (DFA) is a powerful cryptanalytic technique to retrieve secret keys by exploiting the faulty ciphertexts generated during encryption procedure. This article proposes a novel DFA attack that is effective on ITUbee, a software-oriented block cipher for resource-constrained devices. Different from other DFA, our attack makes use of not only faulty values, but also differences between fault-free intermediate values corresponding to 2 plaintexts, which combine traditional differential analysis with DFA. The possible injection positions with different number of faults are discussed. The most efficient attack takes 2$^{25}$ round function operations with 4 faults, which is achieved in a few seconds on a PC.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2017:RNF, author = "Yang Li and Mengting Chen and Zhe Liu and Jian Wang", title = "Reduction in the Number of Fault Injections for Blind Fault Attack on {SPN} Block Ciphers", journal = j-TECS, volume = "16", number = "2", pages = "55:1--55:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3014583", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In 2014, a new fault analysis called blind fault attack (BFA) was proposed, in which attackers can only obtain the number of different faulty outputs without knowing the public data. The original BFA requires 480,000 fault injections to recover a 128-bit AES key. This work attempts to reduce the number of fault injections under the same attack assumptions. We analyze BFA from an information theoretical perspective and introduce a new probability-based distinguisher. Three approaches are proposed for different attack scenarios. The best one realized a 66.8\% reduction of the number of fault injections on AES.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Castiglione:2017:BFI, author = "Arcangelo Castiglione and Raffaele Pizzolante and Francesco Palmieri and Barbara Masucci and Bruno Carpentieri and Alfredo {De Santis} and Aniello Castiglione", title = "On-Board Format-Independent Security of Functional Magnetic Resonance Images", journal = j-TECS, volume = "16", number = "2", pages = "56:1--56:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2893474", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Functional magnetic resonance imaging (fMRI) provides an effective and noninvasive tool for researchers to understand cerebral functions and correlate them with brain activities. In addition, with the ever-increasing diffusion of the Internet, such images may be exchanged in several ways, allowing new research and medical services. On the other hand, ensuring the security of exchanged fMRI data becomes a main concern due to their special characteristics arising from strict ethics and legislative and diagnostic implications. Again, the risks increase when dealing with open environments like the Internet. For this reason, security mechanisms that ensure protection of such data are strongly required. However, we remark that the mechanisms commonly employed for data protection are doomed to fail when dealing with imaging data. In this article, we propose a novel watermarking scheme explicitly addressed for this type of imaging. Such a scheme can be used for several purposes, particularly to ensure authenticity and integrity. Moreover, we show how to integrate our scheme within commercial off-the-shelf fMRI system. Finally, the validity and the efficiency of our scheme has been assessed through testing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:PMH, author = "Jianghua Liu and Jinhua Ma and Wei Wu and Xiaofeng Chen and Xinyi Huang and Li Xu", title = "Protecting Mobile Health Records in Cloud Computing: a Secure, Efficient, and Anonymous Design", journal = j-TECS, volume = "16", number = "2", pages = "57:1--57:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983625", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Electronic healthcare (eHealth) systems have replaced traditional paper-based medical systems due to attractive features such as universal accessibility, high accuracy, and low cost. As a major constituent part of eHealth systems, mobile healthcare (mHealth) applies Mobile Internet Devices (MIDs) and Embedded Devices (EDs), such as tablets, smartphones, and other devices embedded in the bodies of individuals, to improve the quality of life and provide more convenient healthcare services for patients. Unfortunately, MIDs and EDs have only limited computational capacity, storage space, and power supply. By taking this into account, we present a new design to guarantee the integrity of eHealth records and the anonymity of the data owner in a more efficient and flexible way. The essence of our design is a general method which can convert any secure Attribute-Based Signature (ABS) scheme into a highly efficient and secure Online/Offline Attribute-Based Signature (OOABS) scheme. We prove the security and analyze the efficiency improvement of the new design. Additionally, we illustrate the proposed generic construction by applying it to a specific ABS scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:SRS, author = "Wei Wang and Peng Xu and Laurence Tianruo Yang and Willy Susilo and Jinjun Chen", title = "Securely Reinforcing Synchronization for Embedded Online Contests", journal = j-TECS, volume = "16", number = "2", pages = "58:1--58:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2899000", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "When competing in eBay bidding, online games, or e-exams in embedded computing environments, people naturally face asynchronous starts from different computing devices, which is treated as a security risk of online contests. The security risks of online contests also include eavesdropping during data transmission without intended rights, and false starts by malicious competitors, which also means asynchrony in contests. Accordingly, online contests need security guarantees, especially on synchronization. In this article, for synchronic and secure starts in a contest, we update security requirements of confidentiality, anonymity, and synchrony, comparing the current work to our previous work. Based on the updated requirements, we propose a general framework for the Advanced Secure Synchronized Reading (ASSR) system, which can hold multiple contests simultaneously in the cloud. It is important to note that the system can ignore the impacts of heterogeneity among competitors. Considering the heterogeneity both on transmission and computing, we construct a novel Randomness-reused Identity Based Key Encapsulation Mechanism (RIBKEM) to support separable decapsulation, which can shorten both decryption delay and transmission delay with the best efforts. Finally, ASSR enhances synchronization achievement for contest starts with heterogeneous delays of competitors while satisfying other security requirements. As a complement, the analysis on the provable security of ASSR is given, as well as a further analysis on the achievement of synchronization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mozaffari-Kermani:2017:FDA, author = "Mehran Mozaffari-Kermani and Reza Azarderakhsh and Anita Aghaie", title = "Fault Detection Architectures for Post-Quantum Cryptographic Stateless Hash-Based Secure Signatures Benchmarked on {ASIC}", journal = j-TECS, volume = "16", number = "2", pages = "59:1--59:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2930664", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Symmetric-key cryptography can resist the potential post-quantum attacks expected with the not-so-faraway advent of quantum computing power. Hash-based, code-based, lattice-based, and multivariate-quadratic equations are all other potential candidates, the merit of which is that they are believed to resist both classical and quantum computers, and applying ``Shor's algorithm''-the quantum-computer discrete-logarithm algorithm that breaks classical schemes-to them is infeasible. In this article, we propose, assess, and benchmark reliable constructions for stateless hash-based signatures. Such architectures are believed to be one of the prominent post-quantum schemes, offering security proofs relative to plausible properties of the hash function; however, it is well known that their confidentiality does not guarantee reliable architectures in the presence natural and malicious faults. We propose and benchmark fault diagnosis methods for this post-quantum cryptography variant through case studies for hash functions and present the simulations and implementations results (through application-specific integrated circuit evaluations) to show the applicability of the presented schemes. The proposed approaches make such hash-based constructions more reliable against natural faults and help protecting them against malicious faults and can be tailored based on the resources available and for different reliability objectives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gai:2017:SES, author = "Keke Gai and Longfei Qiu and Min Chen and Hui Zhao and Meikang Qiu", title = "{SA--EAST}: Security-Aware Efficient Data Transmission for {ITS} in Mobile Heterogeneous Cloud Computing", journal = j-TECS, volume = "16", number = "2", pages = "60:1--60:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2979677", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The expected advanced network explorations and the growing demand for mobile data sharing and transferring have driven numerous novel applications in Cyber-Physical Systems (CPSs), such as Intelligent Transportation Systems (ITSs). However, current ITS implementations are restricted by the conflicts between security and communication efficiency. Focusing on this issue, this article proposes a Security-Aware Efficient Data Sharing and Transferring (SA-EAST) model, which is designed for securing cloud-based ITS implementations. In applying this approach, we aim to obtain secure real-time multimedia data sharing and transferring. Our experimental evaluation has shown that our proposed model provides an effective performance in securing communications for ITS.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shu:2017:WDD, author = "Junliang Shu and Yuanyuan Zhang and Juanru Li and Bodong Li and Dawu Gu", title = "Why Data Deletion Fails? {A} Study on Deletion Flaws and Data Remanence in {Android} Systems", journal = j-TECS, volume = "16", number = "2", pages = "61:1--61:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3007211", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Smart mobile devices are becoming the main vessel of personal privacy information. While they carry valuable information, data erasure is somehow much more vulnerable than was predicted. The security mechanisms provided by the Android system are not flexible enough to thoroughly delete sensitive data. In addition to the weakness among several provided data-erasing and file-deleting mechanisms, we also target the Android OS design flaws in data erasure, and unveil that the design of the Android OS contradicts some secure data-erasure demands. We present the data-erasure flaws in three typical scenarios on mainstream Android devices, such as the data clearing flaw, application uninstallation flaw, and factory reset flaw. Some of these flaws are inherited data-deleting security issues from the Linux kernel, and some are new vulnerabilities in the Android system. Those scenarios reveal the data leak points in Android systems. Moreover, we reveal that the data remanence on the disk is rarely affected by the user's daily operation, such as file deletion and app installation and uninstallation, by a real-world data deletion latency experiment. After one volunteer used the Android phone for 2 months, the data remanence amount was still considerable. Then, we proposed DataRaider for file recovering from disk fragments. It adopts a file-carving technique and is implemented as an automated sensitive information recovering framework. DataRaider is able to extract private data in a raw disk image without any file system information, and the recovery rate is considerably high in the four test Android phones. We propose some mitigation for data remanence issues, and give the users some suggestions on data protection in Android systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2017:ECS, author = "Sandeep K. Shukla", title = "Editorial: Cyber Security, {IoT}, Block Chains-Risks and Opportunities", journal = j-TECS, volume = "16", number = "3", pages = "62:1--62:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3087913", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wolf:2017:GES, author = "Marilyn Wolf and Jason Xue", title = "Guest Editorial: Special Issue on Embedded Computing for {IoT}", journal = j-TECS, volume = "16", number = "3", pages = "63:1--63:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3065713", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ateniese:2017:LCS, author = "Giuseppe Ateniese and Giuseppe Bianchi and Angelo T. Capossele and Chiara Petrioli and Dora Spenza", title = "Low-Cost Standard Signatures for Energy-Harvesting Wireless Sensor Networks", journal = j-TECS, volume = "16", number = "3", pages = "64:1--64:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2994603", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This work is motivated by a general question: can micro-scale energy-harvesting techniques be exploited to support low-cost standard security solutions on resource-constrained devices? We focus on guaranteeing integrity and authentication in Internet of Things (IoT) and Wireless Sensor Network (WSN) applications. In this article, we propose techniques to make ECDSA signatures low cost and implementable on resource-constrained devices. By combining precomputation techniques and energy-harvesting capabilities of modern sensor nodes, we achieve significant improvement over prior works. In addition, we show that the cost of ECDSA signatures can be reduced by up to a factor 10 by using harvesting-aware optimizations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jayakumar:2017:EAM, author = "Hrishikesh Jayakumar and Arnab Raha and Jacob R. Stevens and Vijay Raghunathan", title = "Energy-Aware Memory Mapping for Hybrid {FRAM--SRAM} {MCUs} in Intermittently-Powered {IoT} Devices", journal = j-TECS, volume = "16", number = "3", pages = "65:1--65:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983628", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Forecasts project that by 2020, there will be around 50 billion devices connected to the Internet of Things (IoT), most of which will operate untethered and unplugged. While environmental energy harvesting is a promising solution to power these IoT edge devices, it introduces new complexities due to the unreliable nature of ambient energy sources. In the presence of an unreliable power supply, frequent checkpointing of the system state becomes imperative, and recent research has proposed the concept of in-situ checkpointing by using ferroelectric RAM (FRAM), an emerging non-volatile memory technology, as unified memory in these systems. Even though an entirely FRAM-based solution provides reliability, it is energy inefficient compared to SRAM due to the higher access latency of FRAM. On the other hand, an entirely SRAM-based solution is highly energy efficient but is unreliable in the face of power loss. This paper advocates an intermediate approach in hybrid FRAM-SRAM microcontrollers that involves judicious memory mapping of program sections to retain the reliability benefits provided by FRAM while performing almost as efficiently as an SRAM-based system. We propose an energy-aware memory mapping technique that maps different program sections to the hybrid FRAM-SRAM microcontroller such that energy consumption is minimized without sacrificing reliability. Our technique consists of eM-map, which performs a one-time characterization to find the optimal memory map for the functions that constitute a program and energy-align, a novel hardware-software technique that aligns the system's powered-on time intervals to function execution boundaries, which results in further improvements in energy efficiency and performance. Experimental results obtained using the MSP430FR5739 microcontroller demonstrate a significant performance improvement of up to 2x and energy reduction of up to 20\% over a state-of-the-art FRAM-based solution. Finally, we present a case study that shows the implementation of our techniques in the context of a real IoT application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tiloca:2017:ADB, author = "Marco Tiloca and Kirill Nikitin and Shahid Raza", title = "{Axiom}: {DTLS}-Based Secure {IoT} Group Communication", journal = j-TECS, volume = "16", number = "3", pages = "66:1--66:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3047413", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents Axiom, a DTLS-based approach to efficiently secure multicast group communication among IoT-constrained devices. Axiom provides an adaptation of the DTLS record layer, relies on key material commonly shared among the group members, and does not require one to perform any DTLS handshake. We made a proof-of-concept implementation of Axiom based on the tinyDTLS library for the Contiki OS and used it to experimentally evaluate performance of our approach on real IoT hardware. Results show that Axiom is affordable on resource-constrained platforms and performs significantly better than related alternative approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chatterjee:2017:PBS, author = "Urbi Chatterjee and Rajat Subhra Chakraborty and Debdeep Mukhopadhyay", title = "A {PUF}-Based Secure Communication Protocol for {IoT}", journal = j-TECS, volume = "16", number = "3", pages = "67:1--67:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005715", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Security features are of paramount importance for the Internet of Things (IoT), and implementations are challenging given the resource-constrained IoT setup. We have developed a lightweight identity-based cryptosystem suitable for IoT to enable secure authentication and message exchange among the devices. Our scheme employs a Physically Unclonable Function (PUF) to generate the public identity of each device, which is used as the public key for each device for message encryption. We have provided formal proofs of security in the Session Key Security and Universally Composable Framework of the proposed protocol, which demonstrates the resilience of the scheme against passive and active attacks. We have demonstrated the setup required for the protocol implementation and shown that the proposed protocol implementation incurs low hardware and software overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:PSSa, author = "Anfeng Liu and Xiao Liu and Zhipeng Tang and Laurence T. Yang and Zili Shao", title = "Preserving Smart Sink-Location Privacy with Delay Guaranteed Routing Scheme for {WSNs}", journal = j-TECS, volume = "16", number = "3", pages = "68:1--68:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2990500", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A Semi Random Circle routing for mobile Sink joint Ray Routing for data (SRCRR) scheme is proposed for preserving sink-location privacy with a delay guaranteed. In the SRCRR scheme, the data are directionally routed along ray paths and stored at intermediate nodes probabilistically. The Sink moves in a semirandom circular pattern to collect data from the local nodes occasionally, which guarantees that the data will be collected with an acceptable delay and prevents attackers from predicting their locations and movements. The experimental results indicate that the performance of the SRCRR scheme is better than that of the previous schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bennett:2017:DDS, author = "Terrell R. Bennett and Nicholas Gans and Roozbeh Jafari", title = "Data-Driven Synchronization for {Internet-of-Things} Systems", journal = j-TECS, volume = "16", number = "3", pages = "69:1--69:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983627", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Internet of Things (IoT) is fueled by the growth of sensors, actuators, and services that collect and process raw sensor data. Wearable and environmental sensors will be a major component of the IoT and provide context about people and activities that are occurring. It is imperative that sensors in the IoT are synchronized, which increases the usefulness and value of the sensor data and allows data from multiple sources to be combined and compared. Due to the heterogeneous nature of sensors (e.g., synchronization protocols, communication channels, etc.), synchronization can be difficult. In this article, we present novel techniques for synchronizing data from multi-sensor environments based on the events and interactions measured by the sensors. We present methods to determine which interactions can likely be used for synchronization and methods to improve synchronization by removing erroneous synchronization points. We validate our technique through experiments with wearable and environmental sensors in a laboratory environment. Experiments resulted in median drift error reduction from 66\% to 98\% for sensors synchronized through physical interactions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shen:2017:MQC, author = "Zhaoyan Shen and Zhijian He and Shuai Li and Qixin Wang and Zili Shao", title = "A Multi-Quadcopter Cooperative Cyber-Physical System for Timely Air Pollution Localization", journal = j-TECS, volume = "16", number = "3", pages = "70:1--70:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005716", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose a cyber-physical system of unmanned quadcopters to locate air pollution sources in a timely manner. The system consists of a physical part and a cyber part. The physical part includes unmanned quadcopters equipped with multiple sensors. The cyber part carries out control laws. We simplify the control laws by decoupling the quadcopters' horizontal-plane motion control from vertical motion control. To control the quadcopter's horizontal-plane motions, we propose a controller that combines pollutant dynamics with quadcopter physics. To control the quadcopter's vertical motions, we adopt an anti-windup proportional-integral (PI) controller. We further extend the horizontal-plane control laws from a single quadcopter to multiple quadcopters. The multi-quadcopter control laws are distributed and convergent. We implement a prototype quadcopter and carry out experiments to verify the vertical control laws. We also carry out simulations to evaluate the horizontal-plane control laws. With quadcopter parameters set commensurate with our prototype implementation's, our simulations show that the control laws can drive quadcopters to locate pollution source(s) in a timely way.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2017:SVA, author = "Jian Wu and Roozbeh Jafari", title = "Seamless Vision-assisted Placement Calibration for Wearable Inertial Sensors", journal = j-TECS, volume = "16", number = "3", pages = "71:1--71:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3023364", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wearable inertial devices are being widely used in the applications of activity tracking, health care, and professional sports, and their usage is on a rapid rise. Signal processing algorithms for these devices are often designed to work with a known location of the wearable sensor on the body. However, in reality, the wearable sensor may be worn at different body locations due to the user's preference or unintentional misplacement. The calibration of the sensor location is important to ensure that the algorithms operate correctly. In this article, we propose an auto-calibration technique for determining the location of wearables on the body by fusing the 3-axis accelerometer data from the devices and three-dimensional camera (i.e., Kinect) information obtained from the environment. The automatic calibration is achieved by a cascade decision-tree-based classifier on top of the minimum least-squares errors obtained by solving Wahba's problem, operating on heterogeneous sensors. The core contribution of our work is that there is no extra burden on the user as a result of this technique. The calibration is done seamlessly, leveraging sensor fusion in an Internet-of-Things setting opportunistically when the user is present in front of an environmental camera performing arbitrary movements. Our approach is evaluated with two different types of movements: simple actions (e.g., sit-to-stand or picking up phone) and complicated tasks (e.g., cooking or playing basketball), yielding 100\% and 82.56\% recall for simple actions and for complicated tasks, respectively, in determining the correct location of sensors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:GEA, author = "Bo-Wei Chen and Wen Ji and Zhu Li", title = "Guest Editorial for {ACM TECS} Special Issue on Effective Divide-and-Conquer, Incremental, or Distributed Mechanisms of Embedded Designs for Extremely Big Data in Large-Scale Devices", journal = j-TECS, volume = "16", number = "3", pages = "72:1--72:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3068457", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:DMR, author = "Anfeng Liu and Xiao Liu and Tianyi Wei and Laurence T. Yang and Seungmin (Charlie) Rho and Anand Paul", title = "Distributed Multi-Representative Re-Fusion Approach for Heterogeneous Sensing Data Collection", journal = j-TECS, volume = "16", number = "3", pages = "73:1--73:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2974021", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A multi-representative re-fusion (MRRF) approximate data collection approach is proposed in which multiple nodes with similar readings form a data coverage set (DCS). The reading value of the DCS is represented by an R-node. The set near the Sink is smaller, while the set far from the Sink is larger, which can reduce the energy consumption in hotspot areas. Then, a distributed data-aggregation strategy is proposed that can re-fuse the value of R-nodes that are far from each other but have similar readings. Both comprehensive theoretical and experimental results indicate that the MRRF approach increases lifetime and energy efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:LBD, author = "Xiaogang Chen and Z. Jane Wang and Xiangyang Ji", title = "A Load-Balancing Divide-and-Conquer {SVM} Solver", journal = j-TECS, volume = "16", number = "3", pages = "74:1--74:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005347", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Scaling up kernel support vector machine (SVM) training has been an important topic in recent years. Despite its theoretical elegance, training kernel SVM is impractical when facing millions of data. The divide-and-conquer (DC) strategy is a natural framework of handling gigantic problems, and the divide-and-conquer solver for kernel SVM (DC-SVM) is able to train kernel SVM with millions of data with limited time cost. However, there are some drawbacks of the DC-SVM approach. First, it used an unsupervised clustering method to partition the whole problem, which is prone to construct singular subsets, and, second, it is hard to balance the computation load between sub-problems. To address these issues, this article proposed a load-balancing partition method for kernel SVM. First, it clusters sample from one class and then assigns data samples to the cluster centers by a distance measure and construct sub-problems; in this way, it is able to control the computation load and avoid singular problems. Experimental results show that the proposed method has better load-balancing performance than DC-SVM, which implies that it is suitable for distributed and embedding systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:PSSb, author = "Bo Liu and Xiao-Tong Yuan and Yang Yu and Qingshan Liu and Dimitris N. Metaxas", title = "Parallel Sparse Subspace Clustering via Joint Sample and Parameter Blockwise Partition", journal = j-TECS, volume = "16", number = "3", pages = "75:1--75:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063316", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sparse subspace clustering (SSC) is a classical method to cluster data with specific subspace structure for each group. It has many desirable theoretical properties and has been shown to be effective in various applications. However, under the condition of a large-scale dataset, learning the sparse sample affinity graph is computationally expensive. To tackle the computation time cost challenge, we develop a memory-efficient parallel framework for computing SSC via an alternating direction method of multiplier (ADMM) algorithm. The proposed framework partitions the data matrix into column blocks and then decomposes the original problem into parallel multivariate Lasso regression subproblems and samplewise operations. The proposed method allows us to allocate multiple cores/machines for the processing of individual column blocks. We propose a stochastic optimization algorithm to minimize the objective function. Experimental results on real-world datasets demonstrate that the proposed blockwise ADMM framework is substantially more efficient than its matrix counterpart used by SSC, without sacrificing performance in applications. Moreover, our approach is directly applicable to parallel neighborhood selection for Gaussian graphical models structure estimation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kung:2017:CPD, author = "Sun-Yuan Kung and Thee Chanyaswad and J. Morris Chang and Peiyuan Wu", title = "Collaborative {PCA\slash DCA} Learning Methods for Compressive Privacy", journal = j-TECS, volume = "16", number = "3", pages = "76:1--76:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996460", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In the Internet era, the data being collected on consumers like us are growing exponentially, and attacks on our privacy are becoming a real threat. To better ensure our privacy, it is safer to let the data owner control the data to be uploaded to the network as opposed to taking chance with data servers or third parties. To this end, we propose compressive privacy, a privacy-preserving technique to enable the data creator to compress data via collaborative learning so that the compressed data uploaded onto the Internet will be useful only for the intended utility and not be easily diverted to malicious applications. For data in a high-dimensional feature vector space, a common approach to data compression is dimension reduction or, equivalently, subspace projection. The most prominent tool is principal component analysis (PCA). For unsupervised learning, PCA can best recover the original data given a specific reduced dimensionality. However, for the supervised learning environment, it is more effective to adopt a supervised PCA, known as discriminant component analysis (DCA), to maximize the discriminant capability. The DCA subspace analysis embraces two different subspaces. The signal-subspace components of DCA are associated with the discriminant distance/power (related to the classification effectiveness), whereas the noise subspace components of DCA are tightly coupled with recoverability and/or privacy protection. This article presents three DCA-related data compression methods useful for privacy-preserving applications: --- Utility-driven DCA: Because the rank of the signal subspace is limited by the number of classes, DCA can effectively support classification using a relatively small dimensionality (i.e., high compression). --- Desensitized PCA: By incorporating a signal-subspace ridge into DCA, it leads to a variant especially effective for extracting privacy-preserving components. In this case, the eigenvalues of the noise-space are made to become insensitive to the privacy labels and are ordered according to their corresponding component powers. --- Desensitized K-means/SOM: Since the revelation of the K-means or SOM cluster structure could leak sensitive information, it is safer to perform K-means or SOM clustering on a desensitized PCA subspace.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fleming:2017:CDI, author = "Tom Fleming and Huang-Ming Huang and Alan Burns and Chris Gill and Sanjoy Baruah and Chenyang Lu", title = "Corrections to and Discussion of {``Implementation and Evaluation of Mixed-criticality Scheduling Approaches for Sporadic Tasks''}", journal = j-TECS, volume = "16", number = "3", pages = "77:1--77:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2974020", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", note = "See \cite{Huang:2014:IEM}.", abstract = "The AMC-IA mixed-criticality scheduling analysis was proposed as an improvement to the AMC-MAX adaptive mixed-criticality scheduling analysis. However, we have identified several necessary corrections to the AMC-IA analysis. In this article, we motivate and describe those corrections, and discuss and illustrate why the corrected AMC-IA analysis cannot be shown to outperform AMC-MAX.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bouraoui:2017:HAE, author = "Hasna Bouraoui and Chadlia Jerad and Anupam Chattopadhyay and Nejib Ben Hadj-Alouane", title = "Hardware Architectures for Embedded Speaker Recognition Applications: a Survey", journal = j-TECS, volume = "16", number = "3", pages = "78:1--78:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2975161", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Authentication technologies based on biometrics, such as speaker recognition, are attracting more and more interest thanks to the elevated level of security offered by these technologies. Despite offering many advantages, such as remote use and low vulnerability, speaker recognition applications are constrained by the heavy computational effort and the hard real-time constraints. When such applications are run on an embedded platform, the problem becomes more challenging, as additional constraints inherent to this specific domain are added. In the literature, different hardware architectures were used/designed for implementing a process with a focus on a given particular metric. In this article, we give a survey of the state-of-the-art works on implementations of embedded speaker recognition applications. Our aim is to provide an overview of the different approaches dealing with acceleration techniques oriented towards speaker and speech recognition applications and attempt to identify the past, current, and future research trends in the area. Indeed, on the one hand, many flexible solutions were implemented, using either General Purpose Processors or Digital Signal Processors. In general, these types of solutions suffer from low area and energy efficiency. On the other hand, high-performance solutions were implemented on Application Specific Integrated Circuits or Field Programmable Gate Arrays but at the expense of flexibility. Based on the available results, we compare the application requirements vis-{\`a}-vis the performance achieved by the systems. This leads to the projection of new research trends that can be undertaken in the future.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2017:AFA, author = "Ye Xu and Israel Koren and C. Mani Krishna", title = "{AdaFT}: a Framework for Adaptive Fault Tolerance for Cyber-Physical Systems", journal = j-TECS, volume = "16", number = "3", pages = "79:1--79:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2980763", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPS) frequently have to use massive redundancy to meet application requirements for high reliability. While such redundancy is required, it can be activated adaptively, based on the current state of the controlled plant. Most of the time, the plant is in a state that allows for a lower level of fault tolerance. Avoiding the continuous deployment of massive fault tolerance will greatly reduce the workload of the CPS, and lower the operating temperature of the cyber sub-system, thus increasing its reliability. In this article, we extend our prior research by demonstrating a software simulation framework Adaptive Fault Tolerance (AdaFT) that can automatically generate the sub-spaces within which our adaptive fault tolerance can be applied. We also show the theoretical benefits of AdaFT and its actual implementation in several real-world CPSs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pagliari:2017:ABC, author = "Daniele Jahier Pagliari and Mario R. Casu and Luca P. Carloni", title = "Accelerators for Breast Cancer Detection", journal = j-TECS, volume = "16", number = "3", pages = "80:1--80:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2983630", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Algorithms used in microwave imaging for breast cancer detection require hardware acceleration to speed up execution time and reduce power consumption. In this article, we present the hardware implementation of two accelerators for two alternative imaging algorithms that we obtain entirely from SystemC specifications via high-level synthesis. The two algorithms present opposite characteristics that stress the design process and the capabilities of commercial HLS tools in different ways: the first is communication bound and requires overlapping and pipelining of communication and computation in order to maximize the application throughput; the second is computation bound and uses complex mathematical functions that HLS tools do not directly support. Despite these difficulties, thanks to HLS, in the span of only 4 months we were able to explore a large design space and derive about 100 implementations with different cost-performance profiles, targeting both a Field-Programmable Gate Array (FPGA) platform and a 32-nm standard-cell Application Specific Integrated Circuit (ASIC) library. In addition, we could obtain results that outperform a previous Register-Transfer Level (RTL) implementation, which confirms the remarkable progress of HLS tools.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:SBT, author = "Jiunn-Yeu Chen and Wuu Yang and Wei-Chung Hsu and Bor-Yeh Shen and Quan-Huei Ou", title = "On Static Binary Translation of {ARM\slash Thumb} Mixed {ISA} Binaries", journal = j-TECS, volume = "16", number = "3", pages = "81:1--81:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996458", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Code discovery has been a main challenge for static binary translation, especially when the source instruction set architecture has variable-length instructions, such as the x86 architectures. Due to embedded data such as PC (program counter)-relative data, jump tables, or paddings in the code section, a binary translator may be misled to translate data as instructions. For variable-length instructions, once a piece of data is mis-translated as instructions, decoding subsequent bytes could also go wrong. We are concerned with static binary translation for the very popular Advanced RISC Machine (ARM) architectures. Although ARM is considered a reduced instruction set computer architecture, it does allow the mix of 32-bit (ARM) instructions and 16-bit (Thumb) instructions in the same executables. In addition to different instruction lengths, the ARM and Thumb instructions are located at 4-byte or 2-byte aligned addresses, respectively. Furthermore, because ARM and Thumb instructions share the same encoding space, a 4-byte word could sometimes be decoded as one ARM instruction or two Thumb instructions. The correct decoding of this 4-byte word is actually determined at runtime by the least-significant bit of the program counter. For unstripped binaries, the mapping symbols can be used to identify ARM code regions and Thumb code regions. However, for stripped binaries, such mapping symbols are unavailable. We propose a novel solution to statically translate stripped ARM/Thumb mixed executables. Our solution is implemented in a static binary translator. The binary translator further generates multiple versions of translated code for the code regions whose types cannot be determined with our solution. One of the code versions is selected during runtime. The binary translator also includes a series of analyses that enable the removal of most useless code versions. Based on the experimental results on stripped ARM/Thumb mixed binaries in the SPEC2006 and Embedded Microprocessor Benchmark Consortium (EEMBC) benchmark suites, our static binary translator achieves impressive performance when migrating them to run on x86 machines and the space overhead is no more than 10\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tan:2017:ITM, author = "Wilson M. Tan and Paul Sullivan and Hamish Watson and Joanna Slota-Newson and Stephen A. Jarvis", title = "An Indoor Test Methodology for Solar-Powered Wireless Sensor Networks", journal = j-TECS, volume = "16", number = "3", pages = "82:1--82:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2994604", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Repeatable and accurate tests are important when designing hardware and algorithms for solar-powered wireless sensor networks (WSNs). Since no two days are exactly alike with regard to energy harvesting, tests must be carried out indoors. Solar simulators are traditionally used in replicating the effects of sunlight indoors; however, solar simulators are expensive, have lighting elements that have short lifetimes, and are usually not designed to carry out the types of tests that hardware and algorithm designers require. As a result, hardware and algorithm designers use tests that are inaccurate and not repeatable (both for others and also for the designers themselves). In this article, we propose an indoor test methodology that does not rely on solar simulators. The test methodology has its basis in astronomy and photovoltaic cell design. We present a generic design for a test apparatus that can be used in carrying out the test methodology. We also present a specific design that we use in implementing an actual test apparatus. We test the efficacy of our test apparatus and, to demonstrate the usefulness of the test methodology, perform experiments akin to those required in projects involving solar-powered WSNs. Results of the said tests and experiments demonstrate that the test methodology is an invaluable tool for hardware and algorithm designers working with solar-powered WSNs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:SUE, author = "Tseng-Yi Chen and Yuan-Hao Chang and Shuo-Han Chen and Nien-I Hsu and Hsin-Wen Wei and Wei-Kuan Shih", title = "On Space Utilization Enhancement of File Systems for Embedded Storage Systems", journal = j-TECS, volume = "16", number = "3", pages = "83:1--83:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2820488", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Since the mid-2000s, mobile/embedded computing systems conventionally have limited computing power, Random Access Memory (RAM) space, and storage capacity due to the consideration of their cost, energy consumption, and physical size. Recently, some of these systems, such as mobile phone and embedded consumer electronics, have more powerful computing capability, so they manage their data in small flash storage devices (e.g., Embedded Multi Media Card (eMMC) and Secure Digital (SD) cards) with a simple file system. However, the existing file systems usually have low space utilization for managing small files and the tail data of large files. In this work, we thus propose a dynamic tail packing scheme to enhance the space utilization of file systems over flash storage devices in embedded computing systems by dynamically aggregating/packing the tail data of (small) files together. To evaluate the benefits and overheads of the proposed scheme, we theoretically formulate analysis equations for obtaining the best settings in the dynamic tail packing scheme. Additionally, the proposed scheme was implemented in the file system of Linux operating systems to evaluate its capability. The results demonstrate that the proposed scheme could significantly improve the space utilization of existing file systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Landy:2017:SAS, author = "Aaron Landy and Greg Stitt", title = "Serial Arithmetic Strategies for Improving {FPGA} Throughput", journal = j-TECS, volume = "16", number = "3", pages = "84:1--84:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996459", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Serial arithmetic has been shown to offer attractive advantages in area for field-programmable gate array (FPGA) datapaths but suffers from a significant reduction in throughput compared to traditional bit-parallel designs. In this work, we perform a performance and trade-off analysis that counterintuitively shows that, despite the decreased throughput of individual serial operators, replication of serial arithmetic can provide a 2.1 $ \times $ average increase in throughput compared to bit-parallel pipelines for common FPGA applications. We complement this analysis with a novel SerDes architecture that enables existing FPGA pipelines to be replaced with serial logic with potentially higher throughput. We also present a serialized sliding-window architecture that improves average throughput 2.4 $ \times $ compared to existing bit-parallel work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alur:2017:SBR, author = "Rajeev Alur and Vojtech Forejt and Salar Moarref and Ashutosh Trivedi", title = "Schedulability of Bounded-Rate Multimode Systems", journal = j-TECS, volume = "16", number = "3", pages = "85:1--85:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2996797", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Bounded-rate multimode systems are hybrid systems that switch freely among a finite set of modes, and whose dynamics are specified by a finite number of real-valued variables with mode-dependent rates that vary within given bounded sets. The scheduler repeatedly proposes a time and a mode, while the environment chooses an allowable rate for that mode; the state of the system changes linearly in the direction of the rate. The scheduler aims to keep the state within a safe set, while the environment aims to leave it. We study the problem of existence of a winning scheduler strategy and associated complexity questions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bandari:2017:DBE, author = "Maryam Bandari and Robert Simon and Hakan Aydin", title = "{DMS}-Based Energy Optimizations for Clustered {WSNs}", journal = j-TECS, volume = "16", number = "3", pages = "86:1--86:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2998179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we consider clustered wireless sensor networks where the nodes harvest energy from the environment. We target performance-sensitive applications that have to collectively send their information to a cluster head by a predefined deadline. The nodes are equipped with Dynamic Modulation Scaling (DMS)-capable wireless radios. DMS provides a tuning knob, allowing us to trade off communication latency with energy consumption. We consider two optimization objectives, maximizing total energy reserves and maximizing the minimum energy level across all nodes. For both objectives, we show that optimal solutions can be obtained by solving Mixed Integer Linear Programming problems. We also develop several fast heuristics that are shown to provide approximate solutions experimentally.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Devaraj:2017:FTP, author = "Rajesh Devaraj and Arnab Sarkar and Santosh Biswas", title = "Fault-Tolerant Preemptive Aperiodic {RT} Scheduling by Supervisory Control of {TDES} on Multiprocessors", journal = j-TECS, volume = "16", number = "3", pages = "87:1--87:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3012278", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Safety-critical real-time systems must meet stringent timing and fault-tolerance requirements. This article proposes a methodology for synthesizing an optimal preemptive multiprocessor aperiodic task scheduler using a formal supervisory control framework. The scheduler can tolerate single/multiple permanent processor faults. Further, the synthesis framework has been empowered with a novel BDD-based symbolic computation mechanism to control the exponential state-space complexity of the optimal exhaustive enumeration-oriented synthesis methodology.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lu:2017:CDS, author = "Qining Lu and Guanpeng Li and Karthik Pattabiraman and Meeta S. Gupta and Jude A. Rivers", title = "Configurable Detection of {SDC}-causing Errors in Programs", journal = j-TECS, volume = "16", number = "3", pages = "88:1--88:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3014586", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Silent Data Corruption (SDC) is a serious reliability issue in many domains, including embedded systems. However, current protection techniques are brittle and do not allow programmers to trade off performance for SDC coverage. Further, many require tens of thousands of fault-injection experiments, which are highly time- and resource-intensive. In this article, we propose two empirical models, SDCTune and SDCAuto, to predict the SDC proneness of a program's data. Both models are based on static and dynamic features of the program alone and do not require fault injections to be performed. The main difference between them is that SDCTune requires manual tuning while SDCAuto is completely automated, using machine-learning algorithms. We then develop an algorithm using both models to selectively protect the most SDC-prone data in the program subject to a given performance overhead bound. Our results show that both models are accurate at predicting the relative SDC rate of an application compared to fault injection, for a fraction of the time taken. Further, in terms of efficiency of detection (i.e., ratio of SDC coverage provided to performance overhead), our technique outperforms full duplication by a factor of 0.78x to 1.65x with the SDCTune model and 0.62x to 0.96x with SDCAuto model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2017:FBA, author = "Guoxian Huang and Lei Wang", title = "An {FPGA}-Based Architecture for High-Speed Compressed Signal Reconstruction", journal = j-TECS, volume = "16", number = "3", pages = "89:1--89:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3056481", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Compressive Sensing (CS) is an emerging research area that allows efficient signal acquisition under the sub-Nyquist rate while still promising reliable data recovery. However, practical applications of CS in hardware platforms are limited as signal reconstruction is still challenging due to its high computational complexity, especially for autonomous real-time signal recovery. In this article, we propose an algorithmic transformation technique referred to as Matrix Inversion Bypass (MIB) to improve the signal recovery efficiency of the Orthogonal Matching Pursuit (OMP)-based CS reconstruction. The basic idea of MIB is to decouple the computations of intermediate signal estimates and matrix inversions, thereby enabling parallel processing of these two time-consuming operations in the OMP algorithm. The proposed MIB naturally leads to a parallel architecture for high-speed dedicated hardware implementations. An FPGA-based implementation is developed with the optimized structure aimed at the efficient utilization of hardware resources while realizing high-speed signal recovery. The proposed architecture can perform the signal recovery at up to 1.4 $ \times $ faster than the OMP-based implementation using almost the same hardware resources.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Groza:2017:LCL, author = "Bogdan Groza and Stefan Murvay and Anthony {Van Herrewege} and Ingrid Verbauwhede", title = "{LiBrA--CAN}: Lightweight Broadcast Authentication for Controller Area Networks", journal = j-TECS, volume = "16", number = "3", pages = "90:1--90:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3056506", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 24 09:51:12 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Despite realistic concerns, security is still absent from vehicular buses such as the widely used Controller Area Network (CAN). We design an efficient protocol based on efficient symmetric primitives, taking advantage of two innovative procedures: splitting keys between nodes and mixing authentication tags. This results in a higher security level when compromised nodes are in the minority, a realistic assumption for automotive networks. Experiments are performed on state-of-the-art Infineon TriCore controllers, contrasted with low-end Freescale S12X cores, while simulations are provided for the recently released CAN-FD standard. To gain compatibility with existent networks, we also discuss a solution based on CAN+.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2017:ESM, author = "Sandeep K. Shukla", title = "Editorial: Security of Mobile Devices", journal = j-TECS, volume = "16", number = "4", pages = "91:1--91:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3129534", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Dec 9 08:24:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mathew:2017:GES, author = "Jimson Mathew and Rajat Subhra Chakraborty and Dhiraj K. Pradhan", title = "Guest Editorial: Special Issue on {``Secure and Fault-Tolerant Embedded Computing''}", journal = j-TECS, volume = "16", number = "4", pages = "92:1--92:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3075563", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ko:2017:PCS, author = "Yohan Ko and Reiley Jeyapaul and Youngbin Kim and Kyoungwoo Lee and Aviral Shrivastava", title = "Protecting Caches from Soft Errors: a Microarchitect's Perspective", journal = j-TECS, volume = "16", number = "4", pages = "93:1--93:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063180", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Soft error is one of the most important design concerns in modern embedded systems with aggressive technology scaling. Among various microarchitectural components in a processor, cache is the most susceptible component to soft errors. Error detection and correction codes are common protection techniques for cache memory due to their design simplicity. In order to design effective protection techniques for caches, it is important to quantitatively estimate the susceptibility of caches without and even with protections. At the architectural level, vulnerability is the metric to quantify the susceptibility of data in caches. However, existing tools and techniques calculate the vulnerability of data in caches through coarse-grained block-level estimation. Further, they ignore common cache protection techniques such as error detection and correction codes. In this article, we demonstrate that our word-level vulnerability estimation is accurate through intensive fault injection campaigns as compared to block-level one. Further, our extensive experiments over benchmark suites reveal several counter-intuitive and interesting results. Parity checking when performed over just reads provides reliable and power-efficient protection than that when performed over both reads and writes. On the other hand, checking error correcting codes only at reads alone can be vulnerable even for single-bit soft errors, while that at both reads and writes provides the perfect reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Esposito:2017:NMO, author = "Stefano Esposito and Massimo Violante and Marco Sozzi and Marco Terrone and Massimo Traversone", title = "A Novel Method for Online Detection of Faults Affecting Execution-Time in Multicore-Based Systems", journal = j-TECS, volume = "16", number = "4", pages = "94:1--94:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063313", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a bounded interference method, based on statistical evaluations, for online detection and tolerance of any fault capable of causing a deadline miss. The proposed method requires data that can be gathered during the profiling and worst-case execution time (WCET) analysis phase. This article describes the method, its application, and then it presents an avionic mixed-criticality use case for experimental evaluation, considering both dual-core and quad-core platforms. Results show that faults that can cause a timing violation are correctly identified while other faults that do not introduce a significant temporal interference can be tolerated to avoid high recovery overheads.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yuce:2017:AFI, author = "Bilgiday Yuce and Nahid Farhady Ghalaty and Chinmay Deshpande and Harika Santapuri and Conor Patrick and Leyla Nazhandali and Patrick Schaumont", title = "Analyzing the Fault Injection Sensitivity of Secure Embedded Software", journal = j-TECS, volume = "16", number = "4", pages = "95:1--95:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063311", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Fault attacks on cryptographic software use faulty ciphertext to reverse engineer the secret encryption key. Although modern fault analysis algorithms are quite efficient, their practical implementation is complicated because of the uncertainty that comes with the fault injection process. First, the intended fault effect may not match the actual fault obtained after fault injection. Second, the logic target of the fault attack, the cryptographic software, is above the abstraction level of physical faults. The resulting uncertainty with respect to the fault effects in the software may degrade the efficiency of the fault attack, resulting in many more trial fault injections than the amount predicted by the theoretical fault attack. In this contribution, we highlight the important role played by the processor microarchitecture in the development of a fault attack. We introduce the microprocessor fault sensitivity model to systematically capture the fault response of a microprocessor pipeline. We also propose Microarchitecture-Aware Fault Injection Attack (MAFIA). MAFIA uses the fault sensitivity model to guide the fault injection and to predict the fault response. We describe two applications for MAFIA. First, we demonstrate a biased fault attack on an unprotected Advanced Encryption Standard (AES) software program executing on a seven-stage pipelined Reduced Instruction Set Computer (RISC) processor. The use of the microprocessor fault sensitivity model to guide the attack leads to an order of magnitude fewer fault injections compared to a traditional, blind fault injection method. Second, MAFIA can be used to break known software countermeasures against fault injection. We demonstrate this by systematically breaking a collection of state-of-the-art software fault countermeasures. These two examples lead to the key conclusion of this work, namely that software fault attacks become much more harmful and effective when an appropriate microprocessor fault sensitivity model is used. This, in turn, highlights the need for better fault countermeasures for software.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mera:2017:ATP, author = "Maria Isabel Mera and Jonah Caplan and Seyyed Hasan Mozafari and Brett H. Meyer and Peter Milder", title = "Area, Throughput, and Power Trade-Offs for {FPGA}- and {ASIC}-Based Execution Stream Compression", journal = j-TECS, volume = "16", number = "4", pages = "96:1--96:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063312", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "An emerging trend in safety-critical computer system design is the use of compression --- for example, using cyclic redundancy check (CRC) or Fletcher checksum (FC) --- to reduce the state that must be compared to verify correct redundant execution. We examine the costs and performance of CRC and FC as compression algorithms when implemented in hardware for embedded safety-critical systems. To do so, we have developed parameterizable hardware-generation tools targeting CRC and two novel FC implementations. We evaluate the resulting designs implemented for FPGA and ASIC and analyze their efficiency. While CRC is often best, FC dominates when high throughput is needed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tigori:2017:FMB, author = "Kabland Toussaint Gautier Tigori and Jean-Luc B{\'e}chennec and S{\'e}bastien Faucou and Olivier Henri Roux", title = "Formal Model-Based Synthesis of Application-Specific Static {RTOS}", journal = j-TECS, volume = "16", number = "4", pages = "97:1--97:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3015777", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In an embedded system, the specialization of the code of the real-time operating system (RTOS) according to the requirements of the application allows one to remove unused services and other sources of dead code from the binary program. The typical specialization process is based on a mix of precompiler macros and build scripts, both of which are known for being sources of errors. In this article, we present a new model-based approach to the design of application-specific RTOS. Starting with finite state models describing the RTOS and the application requirements, the set of blocks in the RTOS code actually used by the application is automatically computed. This set is used to build an application-specific RTOS model. This model is fed into a code generator to produce the source code of an application-specific RTOS. It is also used to carry on model-based validations and verifications, including the formal verification that the specialization process did not introduce unwanted behaviors or suppress expected ones. To demonstrate the feasibility of this approach, it is applied to specialize Trampoline, an open-source implementation of the AUTOSAR OS standard, to an industrial case study from the automotive domain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Santanna:2017:DIS, author = "Francisco Sant'anna and Roberto Ierusalimschy and Noemi Rodriguez and Silvana Rossetto and Adriano Branco", title = "The Design and Implementation of the Synchronous Language {C{\'e}U}", journal = j-TECS, volume = "16", number = "4", pages = "98:1--98:26", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3035544", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "C {\'e}U is a synchronous language targeting soft real-time systems. It is inspired by Esterel and has a simple semantics with fine-grain control over program execution. C{\'e}U uses an event-triggered notion of time that enables compile-time checks to detect conflicting concurrent statements, resulting in deterministic and concurrency-safe programs. We present the particularities of our design in comparison to Esterel, such as stack-based internal events, concurrency checks, safe integration with C, and first-class timers. We also present two implementation back ends: one aiming for resource efficiency and interoperability with C, and another as a virtual machine that allows remote reprogramming.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stilkerich:2017:PGU, author = "Isabella Stilkerich and Clemens Lang and Christoph Erhardt and Christian Bay and Michael Stilkerich", title = "The Perfect Getaway: Using Escape Analysis in Embedded Real-Time Systems", journal = j-TECS, volume = "16", number = "4", pages = "99:1--99:30", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3035542", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The use of a managed, type-safe language such as Java in real-time and embedded systems offers productivity and, in particular, safety and dependability benefits at a reasonable cost. It has been shown for commodity systems that Escape Analysis (EA) enables a set of useful optimizations, and benefits from the properties of a type-safe language. In this article, we explore the application of escape analysis in KESO [Stilkerich et al. 2012], a Java ahead-of-time compiler targeting embedded real-time systems. We present specific applications of EA for embedded programs that go beyond the widely known stack-allocation and synchronization optimizations such as extended remote-procedure-call (RPC) support for software-isolated applications, automated inference of immutable data, or improved upper space and time bounds for worst-case estimations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hassan:2017:PRA, author = "Mohamed Hassan and Hiren Patel and Rodolfo Pellizzoni", title = "{PMC}: a Requirement-Aware {DRAM} Controller for Multicore Mixed Criticality Systems", journal = j-TECS, volume = "16", number = "4", pages = "100:1--100:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3019611", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose a novel approach to schedule memory requests in Mixed Criticality Systems (MCS). This approach supports an arbitrary number of criticality levels by enabling the MCS designer to specify memory requirements per task. It retains locality within large-size requests to satisfy memory requirements of all tasks. To achieve this target, we introduce a compact time-division-multiplexing scheduler, and a framework that constructs optimal schedules to manage requests to off-chip memory. We also present a static analysis that guarantees meeting requirements of all tasks. We compare the proposed controller against state-of-the-art memory controllers using both a case study and synthetic experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:HAT, author = "Tianyi Wang and Soamar Homsi and Linwei Niu and Shaolei Ren and Ou Bai and Gang Quan and Meikang Qiu", title = "Harmonicity-Aware Task Partitioning for Fixed Priority Scheduling of Probabilistic Real-Time Tasks on Multi-Core Platforms", journal = j-TECS, volume = "16", number = "4", pages = "101:1--101:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3064813", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The uncertainty due to performance variations of IC chips and resource sharing on multi-core platforms have significantly degraded the predictability of real-time systems. Traditional deterministic approaches based on the worst-case assumptions become extremely pessimistic and thus unpractical. In this article, we address the problem of scheduling a set of fixed-priority periodic real-time tasks on multi-core platforms in a probabilistic manner. Specifically, we consider task execution time as a probabilistic distribution and study how to schedule these tasks on multi-core platforms with guaranteed Quality of Service (QoS) requirements in terms of deadline-missing probabilities. Moreover, it is a well-known fact that the relationship among task periods, if exploited appropriately, can significantly improve the processor utilization. To this end, we present a novel approach to partition real-time tasks that can take both task execution time distributions and their period relationships into consideration. From our extensive experiment results, our proposed methods can greatly improve the schedulability of real-time tasks when compared with existing approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:DRM, author = "Yi Wang and Yajun Ha", title = "A {DFA}-Resistant and Masked {PRESENT} with Area Optimization for {RFID} Applications", journal = j-TECS, volume = "16", number = "4", pages = "102:1--102:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3035543", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Radio-Frequency Identification (RFID) tag-based applications are usually resource constrained and security sensitive. However, only about 2,000 gate equivalents in a tag can be budgeted for implementing security components [27]. This requires not only lightweight cryptographic algorithms such as PRESENT (around 1,000 gate equivalents) but also lightweight protections against modern Side Channel Attacks (SCAs). With this budget, the first-order masking and fault detection are two suitable countermeasures to be developed for PRESENT. However, if both countermeasures are applied without any optimization, it will significantly exceed the given area budget. In this work, we optimize area to include both countermeasures to maximize the security for PRESENT within this RFID area budget. The most area-consuming parts of the proposed design are the masked S-boxes and the inverse masked S-boxes. To optimize the area, we have deduced a computational relationship between these two parts, which enables us to reuse the hardware resource of the masked S-boxes to implement the inverse masked S-boxes. The proposed design takes up only 2,376 gates with UMC 65nm CMOS technology. Compared with the unoptimized design, our implementation reduces the overall area by 28.45\%. We have tested the effectiveness of the first-order Differential Power Analysis (DPA) and Differential Fault Analysis (DFA) -resistant countermeasures. Experimental results show that we have enhanced the SCA resistance of our PRESENT implementation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nagar:2017:RCB, author = "Kartik Nagar and Y. N. Srikant", title = "Refining Cache Behavior Prediction Using Cache Miss Paths", journal = j-TECS, volume = "16", number = "4", pages = "103:1--103:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3035541", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Worst-Case Execution Time (WCET) is an important metric for programs running on real-time systems, and finding precise estimates of a program's WCET is crucial to avoid wastage of hardware resources and to improve the schedulability of task sets. Caches have a major impact on a program's execution time, and accurate estimation of a program's cache behavior can lead to significant reduction in its estimated WCET. The traditional approach to cache analysis generally targets the worst-case cache behavior of individual cache accesses and provides a safe hit-miss classification for every individual access. In this work, we show that these classifications are not sufficient to precisely capture cache behavior, since they apply to individual accesses, and often, more precise predictions can be made about groups of accesses. Further, memory accesses inside loops may show the worst-case behavior only for a subset of the iteration space. In order to predict such behavior in a scalable fashion, we use the fact that the cache behavior of an access mostly depends only on the memory accesses made in the immediate vicinity, and hence we analyze a small, fixed-size neighborhood of every access with complete precision and summarize the resulting information in the form of cache miss paths. A variety of analyses are then performed on the cache miss paths to make precise predictions about cache behavior. We also demonstrate precision issues in Abstract Interpretation-based Must and Persistence cache analysis that can be easily solved using cache miss paths. Experimental results over a wide range of benchmarks demonstrate precision improvement in WCET of multipath programs over previous approaches, and we also show how to integrate our approach with other microarchitectural analysis such as pipeline analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Benerecetti:2017:ASS, author = "Massimo Benerecetti and Marco Faella", title = "Automatic Synthesis of Switching Controllers for Linear Hybrid Systems: Reachability Control", journal = j-TECS, volume = "16", number = "4", pages = "104:1--104:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3047500", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider the problem of computing the controllable region of a Linear Hybrid Automaton with controllable and uncontrollable transitions, w.r.t. a reachability objective. We provide an algorithm for the finite-horizon version of the problem, based on computing the set of states that must reach a given non-convex polyhedron while avoiding another one, subject to a polyhedral constraint on the slope of the trajectory. Experimental results are presented, based on an implementation of the proposed algorithm on top of the tool SpaceEx.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sandoval:2017:TTS, author = "Nathan Sandoval and Casey Mackin and Sean Whitsitt and Vijay Shankar Gopinath and Sachidanand Mahadevan and Andrew Milakovich and Kyle Merry and Jonathan Sprinkle and Roman Lysecky", title = "Task Transition Scheduling for Data-Adaptable Systems", journal = j-TECS, volume = "16", number = "4", pages = "105:1--105:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3047498", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Data-adaptable embedded systems operate on a variety of data streams, which requires a large degree of configurability and adaptability to support runtime changes in data stream inputs. Data-adaptable reconfigurable embedded systems, when decomposed into a series of tasks, enable a flexible runtime implementation in which a system can transition the execution of certain tasks between hardware and software while simultaneously continuing to process data during the transition. Efficient runtime scheduling of task transitions is needed to optimize system throughput and latency of the reconfiguration and transition periods. In this article, we provide an overview of a runtime framework enabling the efficient transition of tasks between software and hardware in response to changes in system inputs. We further present and analyze several runtime transition scheduling algorithms and highlight the latency and throughput tradeoffs for two data-adaptable systems. To evaluate the task transition selection algorithms, a case study was performed on an adaptable JPEG2000 implementation as well as three other synchronous dataflow systems characterized by transition latency and communication load.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zheng:2017:RTS, author = "Xi Zheng and Christine Julien and Hongxu Chen and Rodion Podorozhny and Franck Cassez", title = "Real-Time Simulation Support for Runtime Verification of Cyber-Physical Systems", journal = j-TECS, volume = "16", number = "4", pages = "106:1--106:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063382", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In Cyber-Physical Systems (CPS), cyber and physical components must work seamlessly in tandem. Runtime verification of CPS is essential yet very difficult, due to deployment environments that are expensive, dangerous, or simply impossible to use for verification tasks. A key enabling factor of runtime verification of CPS is the ability to integrate real-time simulations of portions of the CPS into live running systems. We propose a verification approach that allows CPS application developers to opportunistically leverage real-time simulation to support runtime verification. Our approach, termed B raceBind, allows selecting, at runtime, between actual physical processes or simulations of them to support a running CPS application. To build BraceBind, we create a real-time simulation architecture to generate and manage multiple real-time simulation environments based on existing simulation models in a manner that ensures sufficient accuracy for verifying a CPS application. Specifically, BraceBind aims to both improve simulation speed and minimize latency, thereby making it feasible to integrate simulations of physical processes into the running CPS application. BraceBind then integrates this real-time simulation architecture with an existing runtime verification approach that has low computational overhead and high accuracy. This integration uses an aspect-oriented adapter architecture that connects the variables in the cyber portion of the CPS application with either sensors and actuators in the physical world or the automatically generated real-time simulation. Our experimental results show that, with a negligible performance penalty, our approach is both efficient and effective in detecting program errors that are otherwise only detectable in a physical deployment.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2017:DPE, author = "Kaisheng Ma and Xueqing Li and Huichu Liu and Xiao Sheng and Yiqun Wang and Karthik Swaminathan and Yongpan Liu and Yuan Xie and John Sampson and Vijaykrishnan Narayanan", title = "Dynamic Power and Energy Management for Energy Harvesting Nonvolatile Processor Systems", journal = j-TECS, volume = "16", number = "4", pages = "107:1--107:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3077575", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Self-powered systems running on scavenged energy will be a key enabler for pervasive computing across the Internet of Things. The variability of input power in energy-harvesting systems limits the effectiveness of static optimizations aimed at maximizing the input-energy-to-computation ratio. We show that the resultant gap between available and exploitable energy is significant, and that energy storage optimizations alone do not significantly close the gap. We characterize these effects on a real, fabricated energy-harvesting system based on a nonvolatile processor. We introduce a unified energy-oriented approach to first optimize the number of backups, by more aggressively using the stored energy available when power failure occurs, and then optimize forward progress via improving the rate of input energy to computation via dynamic voltage and frequency scaling and self-learning techniques. We evaluate combining these schemes and show capture of up to 75.5\% of all input energy toward processor computation, an average of $ 1.54 \times $ increase over the best static ``Forward Progress'' baseline system. Notably, our energy-optimizing policy combinations simultaneously improve both the rate of forward progress and the rate of backup events (by up to 60.7\% and 79.2\% for RF power, respectively, and up to 231.2\% and reduced to zero, respectively, for solar power). This contrasts with static frequency optimization approaches in which these two metrics are antagonistic.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chatterjee:2017:FTD, author = "Navonil Chatterjee and Suraj Paul and Santanu Chattopadhyay", title = "Fault-Tolerant Dynamic Task Mapping and Scheduling for Network-on-Chip-Based Multicore Platform", journal = j-TECS, volume = "16", number = "4", pages = "108:1--108:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3055512", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In Network-on-Chip (NoC)-based multicore systems, task allocation and scheduling are known to be important problems, as they affect the performance of applications in terms of energy consumption and timing. Advancement of deep submicron technology has made it possible to scale the transistor feature size to the nanometer range, which has enabled multiple processing elements to be integrated onto a single chip. On the flipside, it has made the integrated entities on the chip more susceptible to different faults. Although a significant amount of work has been done in the domain of fault-tolerant mapping and scheduling, existing algorithms either precompute reconfigured mapping solutions at design time while anticipating fault(s) scenarios or adopt a hybrid approach wherein a part of the fault mitigation strategy relies on the design-time solution. The complexity of the problem rises further for real-time dynamic systems where new applications can arrive in the multicore platform at any time instant. For real-time systems, the validity of computation depends both on the correctness of results and on temporal constraint satisfaction. This article presents an improved fault-tolerant dynamic solution to the integrated problem of application mapping and scheduling for NoC-based multicore platforms. The developed algorithm provides a unified mapping and scheduling method for real-time systems focusing on meeting application deadlines and minimizing communication energy. A predictive model has been used to determine the failure-prone cores in the system for which a fault-tolerant resource allocation with task redundancy has been performed. By selectively using a task replication policy, the reliability of the application, executing on a given NoC platform, is improved. A detailed evaluation of the performance of the proposed algorithm has been conducted for both real and synthetic applications. When compared with other fault-tolerant algorithms reported in the literature, performance of the proposed algorithm shows an average reduction of 56.95\% in task re-execution time overhead and an average improvement of 31\% in communication energy. Further, for time-constrained tasks, deadline satisfaction has also been achieved for most of the test cases by the developed algorithm, whereas the techniques reported in the literature failed to meet deadline in about 45\% test cases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahir:2017:LAR, author = "Prashant Ahir and Mehran Mozaffari-Kermani and Reza Azarderakhsh", title = "Lightweight Architectures for Reliable and Fault Detection {Simon} and {Speck} Cryptographic Algorithms on {FPGA}", journal = j-TECS, volume = "16", number = "4", pages = "109:1--109:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3055514", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The widespread use of sensitive and constrained applications necessitates lightweight (low-power and low-area) algorithms developed for constrained nano-devices. However, nearly all of such algorithms are optimized for platform-based performance and may not be useful for diverse and flexible applications. The National Security Agency (NSA) has proposed two relatively recent families of lightweight ciphers, that is, Simon and Speck, designed as efficient ciphers on both hardware and software platforms. This article proposes concurrent error detection schemes to provide reliable architectures for these two families of lightweight block ciphers. The research work on analyzing the reliability of these algorithms and providing fault diagnosis approaches has not been undertaken to date to the best of our knowledge. The main aim of the proposed reliable architectures is to provide high error coverage while maintaining acceptable area and power consumption overheads. To achieve this, we propose a variant of recomputing with encoded operands. These low-complexity schemes are suited for low-resource applications such as sensitive, constrained implantable and wearable medical devices. We perform fault simulations for the proposed architectures by developing a fault model framework. The architectures are simulated and analyzed on recent field-programmable grate array (FPGA) platforms, and it is shown that the proposed schemes provide high error coverage. The proposed low-complexity concurrent error detection schemes are a step forward toward more reliable architectures for Simon and Speck algorithms in lightweight, secure applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2017:EMW, author = "Chen Pan and Mimi Xie and Chengmo Yang and Yiran Chen and Jingtong Hu", title = "Exploiting Multiple Write Modes of Nonvolatile Main Memory in Embedded Systems", journal = j-TECS, volume = "16", number = "4", pages = "110:1--110:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063130", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Existing Nonvolatile Memories (NVMs) have many attractive features to be the main memory of embedded systems. These features include low power, high density, and better scalability. Recently, Multilevel Cell (MLC) NVM has gained more and more popularity as it can provide a higher density than the traditional Single-Level Cell (SLC) NVM. However, there are also drawbacks in MLC NVM, namely, limited write endurance and expensive write operation. These two drawbacks have to be overcome before MLC NVM can be practically adopted as the main memory. In MLC Nonvolatile Main Memory (NVMM), two different types of write operations with very diverse data retention times are allowed. The first type maintains data for years but takes a longer time to write and is detrimental to the endurance. The second type maintains data for a short period but takes a shorter time to write. By observing that much of the data written to main memory is temporary and does not need to last long during the execution of a program, in this article, we propose novel task scheduling and write operation selection algorithms to improve MLC NVMM endurance and program efficiency. An Integer Linear Programming (ILP) formulation is first proposed to obtain optimal results. Since ILP takes exponential time to solve, we also propose the Multiwrite Mode-Aware Scheduling (MMAS) algorithm to achieve a near-optimal solution in polynomial time. Additionally, the Dynamical Memory Block Screening (DMS) algorithm is proposed to achieve wear leveling. The experimental results demonstrate that the proposed techniques can greatly improve the lifetime of the MLC NVMM as well as the efficiency of the program.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2017:TPR, author = "Yu Li and Albert M. K. Cheng", title = "Toward a Practical Regularity-based Model: The Impact of Evenly Distributed Temporal Resource Partitions", journal = j-TECS, volume = "16", number = "4", pages = "111:1--111:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3092945", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Most Hierarchical Real-time Scheduling (HiRTS) techniques have focused on temporal resource partitions in which time units are periodically distributed. Although such periodic partitions could provide great flexibility for the resource-level scheduling, engineers face significant obstacles when trying to determine the schedulability of real-time tasks running on them. The main reason is that periodic partitions fail to effectively bound the difference between the ideal and the actual resource allocation. To solve this problem, some researchers introduced the Regular Partition, a type of temporal resource partition that is almost evenly distributed. Recent research has shown that it achieves maximal transparency for task scheduling-some classical real-time scheduling problems on a regular partition can be easily transformed into equivalent problems on a dedicated single resource. However, the resource partitioning problem for regular partitions is much more complicated than the one for periodic partitions. Based on a practical two-layer HiRTS platform, this article introduces MulZ (Multiple Z-sequences), which is the first to solve this problem with a partitioned scheduling strategy. By using a more complicated approximation methodology, our experimental results show that MulZ outperforms the current best global scheduling algorithm on this problem. After that, it compares the overall performance of the periodic partition and the regular partition. We conclude that the regular partition is a better choice for the integration of real-time applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2017:WAF, author = "Yooseong Kim and David Broman and Aviral Shrivastava", title = "{WCET}-Aware Function-Level Dynamic Code Management on Scratchpad Memory", journal = j-TECS, volume = "16", number = "4", pages = "112:1--112:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3063383", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Scratchpad memory (SPM) is a promising on-chip memory choice in real-time and cyber-physical systems where timing is of the utmost importance. SPM has time-predictable characteristics since its data movement between the SPM and the main memory is entirely managed by software. One way of such management is dynamic management. In dynamic management of instruction SPMs, code blocks are dynamically copied from the main memory to the SPM at runtime by executing direct memory access (DMA) instructions. Code management techniques try to minimize the overhead of DMA operations by finding an allocation scheme that leads to efficient utilization. In this article, we present three function-level code management techniques. These techniques perform allocation at the granularity of functions, with the objective of minimizing the impact of DMA overhead to the worst-case execution time (WCET) of a given program. The first technique finds an optimal mapping of each function to a region using integer linear programming (ILP), whereas the second technique is a polynomial-time heuristic that is suboptimal. The third technique maps functions directly to SPM addresses, not using regions, which can further reduce the WCET. Based on ILP, it can also find an optimal mapping. We evaluate our techniques using the M{\"a}lardalen WCET suite, MiBench suite, and proprietary automotive applications from industry. The results show that our techniques can significantly reduce the WCET estimates compared to caches with the state-of-the-art cache analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:PNM, author = "Guanjun Liu and Mengchu Zhou and Changjun Jiang", title = "{Petri} Net Models and Collaborativeness for Parallel Processes with Resource Sharing and Message Passing", journal = j-TECS, volume = "16", number = "4", pages = "113:1--113:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2810001", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Petri nets are widely used to model and analyse concurrent systems. There exist two distinct classes of Petri nets that focus on different features of concurrent systems. The first one features multiple parallel processes sharing a group of common resources but not interacting/collaborating with each other. The second one allows multiple parallel processes to interact/collaborate with each other via message exchange but does not share any common resources. However, in many distributed environments, multiple processes both interact/collaborate with each other and share some common resources. To model and analyse such systems, this article defines a new class of Petri nets called Parallel Process Nets (P$^2$ Ns) that may be viewed as a generalization of the two mentioned above. We propose collaborativeness and close collaborativeness for P$^2$ Ns. The former guarantees that a modelled system is both deadlock-free and livelock-free, and the latter guarantees that it is deadlock-free, livelock-free, and starvation-free. These concepts and ideas are illustrated through some classical examples such as Producer-Consumer Problem and Dinning Philosophers Problem. Algorithms are developed to decide them. At last, P$^2$ Ns are applied to the modelling and analysis of two real systems: hospital information system and elevator scheduling system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ciszewski:2017:EAC, author = "Michal Ciszewski and Konrad Iwanicki", title = "Efficient Automated Code Partitioning for Microcontrollers with Switchable Memory Banks", journal = j-TECS, volume = "16", number = "4", pages = "114:1--114:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3055511", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Switching active memory banks at runtime allows a processor with a narrow address bus to access memory that exceeds ranges normally addressable via the bus. Switching code memory banks is regaining interest in microcontrollers for the Internet of Things (IoT), which have to run continuously growing software, while at the same time consuming ultra-small amounts of energy. To make use of bank switching, such software must be partitioned among the available banks and augmented with bank-switching instructions. In contrast to the augmenting, which is done automatically by a compiler, today the partitioning is normally done manually by programmers. However, since IoT software is cross-compiled on much more powerful machines than its target microcontrollers, it becomes possible to partition it automatically during compilation. In this article, we thus study the problem of partitioning program code among banks such that the resulting runtime performance of the program is maximized. We prove that the problem is NP -hard and propose a heuristic algorithm with a low complexity, so it enables fast compilation and hence interactive software development. The algorithm decomposes the problem into three subproblems and introduces a heuristic for each of them: (1) which pieces of code to partition, (2) which of them to assign to permanently mapped banks, and (3) how to divide the remaining ones among switchable banks. We integrate the algorithm, together with earlier ones, in an open-source compiler and test the resulting solution on synthetic as well as actual commercial IoT software bases, thereby demonstrating its advantages and drawbacks. In particular, the results show that the performance of partitions produced by our algorithm comes close to that of partitions created manually by programmers with expert knowledge on the partitioned code.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2017:EKM, author = "Yun Liang and Xiuhong Li", title = "Efficient Kernel Management on {GPUs}", journal = j-TECS, volume = "16", number = "4", pages = "115:1--115:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3070710", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics Processing Units (GPUs) have been widely adopted as accelerators for compute-intensive applications due to its tremendous computational power and high memory bandwidth. As the complexity of applications continues to grow, each new generation of GPUs has been equipped with advanced architectural features and more resources to sustain its performance acceleration capability. Recent GPUs have been featured with concurrent kernel execution, which is designed to improve the resource utilization by executing multiple kernels simultaneously. However, it is still a challenge to find a way to manage the resources on GPUs for concurrent kernel execution. Prior works only achieve limited performance improvement as they do not optimize the thread-level parallelism (TLP) and model the resource contention for the concurrently executing kernels. In this article, we design an efficient kernel management framework that optimizes the performance for concurrent kernel execution on GPUs. Our kernel management framework contains two key components: TLP modulation and cache bypassing. The TLP modulation is employed to adjust the TLP for the concurrently executing kernels. It consists of three parts: kernel categorization, static TLP modulation, and dynamic TLP modulation. The cache bypassing is proposed to mitigate the cache contention by only allowing a subset of a kernel's blocks to access the L1 data cache. Experiments indicate that our framework can improve the performance by $ 1.51 \times $ on average (energy-efficiency by $ 1.39 \times $ on average), compared with the default concurrent kernel execution framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2017:ESD, author = "Yuliang Sun and Lanjun Wang and Chen Wang and Yu Wang", title = "Exploiting Stable Data Dependency in Stream Processing Acceleration on {FPGAs}", journal = j-TECS, volume = "16", number = "4", pages = "116:1--116:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3092950", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the unique feature of fine-grained parallelism, field-programmable gate arrays (FPGAs) show great potential for streaming algorithm acceleration. However, the lack of a design framework, restrictions on FPGAs, and ineffective tools impede the utilization of FPGAs in practice. In this study, we provide a design paradigm to support streaming algorithm acceleration on FPGAs. We first propose an abstract model to describe streaming algorithms with homogeneous sub-functions (HSF) and stable data dependency (SDD), which we call the HSF-SDD model. Using this model, we then develop an FPGA framework, PE-Ring, that has the advantages of (1) fully exploiting algorithm parallelism to achieve high performance, (2) leveraging block RAM to serve large scale parameters, and (3) enabling flexible parameter adjustments. Based on the proposed model and framework, we finally implement a specific converter to generate the register-transfer level representation of the PE-Ring. Experimental results show that our method outperforms ordinary FPGA design tools by one to two orders of magnitude. Experiments also demonstrate the scalability of the PE-Ring.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "116", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:HPI, author = "Zhe Liu and Thomas P{\"o}ppelmann and Tobias Oder and Hwajeong Seo and Sujoy Sinha Roy and Tim G{\"u}neysu and Johann Gro{\ss}sch{\"a}dl and Howon Kim and Ingrid Verbauwhede", title = "High-Performance Ideal Lattice-Based Cryptography on $8$-Bit {AVR} Microcontrollers", journal = j-TECS, volume = "16", number = "4", pages = "117:1--117:??", month = aug, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3092951", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Aug 14 18:53:33 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Over recent years lattice-based cryptography has received much attention due to versatile average-case problems like Ring-LWE or Ring-SIS that appear to be intractable by quantum computers. In this work, we evaluate and compare implementations of Ring-LWE encryption and the bimodal lattice signature scheme (BLISS) on an 8-bit Atmel ATxmega128 microcontroller. Our implementation of Ring-LWE encryption provides comprehensive protection against timing side-channels and takes 24.9ms for encryption and 6.7ms for decryption. To compute a BLISS signature, our software takes 317ms and 86ms for verification. These results underline the feasibility of lattice-based cryptography on constrained devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "117", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Malik:2017:MCH, author = "Avinash Malik and Partha S. Roop and Sidharta Andalam and Mark Trew and Michael Mendler", title = "Modular Compilation of Hybrid Systems for Emulation and Large Scale Simulation", journal = j-TECS, volume = "16", number = "5s", pages = "118:1--118:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126536", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hybrid systems combine discrete controllers with adjoining physical processes. While many approaches exist for simulating hybrid systems, there are few approaches for their emulation, especially when the actual physical plant is not available. This paper develops the first formal framework for emulation along with a new compiler that enables large-scale (1000+ components) simulation. We propose a formal model called Synchronous Emulation Automaton (SEA) specifically for modular compilation and parallel execution. SEA combines Linear Time Invariant (LTI) systems with discrete mode switches and has the following semantic differences with Hybrid Automata: (1) the Ordinary Differential Equations are solved analytically and the solutions are sampled at the Worst-Case Reaction Time of the model and (2) we develop a new composition semantics, which allows individual SEAs to execute in parallel with each other. The proposed semantics eliminates: (a) the need for dynamic numerical solvers, and (b) the Zeno-phenomenon by construction. Experimental results show that process models designed using our tool (Piha) give a 3.6 times execution speedup over Simulink\reg, and up to 26 times speedup on manycore architectures.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "118", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Blindell:2017:CPU, author = "Gabriel Hjort Blindell and Mats Carlsson and Roberto Casta{\~n}eda Lozano and Christian Schulte", title = "Complete and Practical Universal Instruction Selection", journal = j-TECS, volume = "16", number = "5s", pages = "119:1--119:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126528", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In code generation, instruction selection chooses processor instructions to implement a program under compilation where code quality crucially depends on the choice of instructions. Using methods from combinatorial optimization, this paper proposes an expressive model that integrates global instruction selection with global code motion. The model introduces (1) handling of memory computations and function calls, (2) a method for inserting additional jump instructions where necessary, (3) a dependency-based technique to ensure correct combinations of instructions, (4) value reuse to improve code quality, and (5) an objective function that reduces compilation time and increases scalability by exploiting bounding techniques. The approach is demonstrated to be complete and practical, competitive with LLVM, and potentially optimal (w.r.t. the model) for medium-sized functions. The results show that combinatorial optimization for instruction selection is well-suited to exploit the potential of modern processors in embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "119", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Su:2017:EWA, author = "Xuesong Su and Hui Wu and Jingling Xue", title = "An Efficient {WCET}-Aware Instruction Scheduling and Register Allocation Approach for Clustered {VLIW} Processors", journal = j-TECS, volume = "16", number = "5s", pages = "120:1--120:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126524", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In real-time embedded system design, one major goal is to construct a feasible schedule. Whether a feasible schedule exists depends on the Worst-Case Execution Time (WCET) of each task. Consequently, it is important to minimize the WCET of each task. We investigate the problem of instruction scheduling and register allocation for a program executed on a clustered Very Long Instruction Word (VLIW) processor such that the WCET of the program is minimized, and propose a novel, unified instruction scheduling and register allocation heuristic approach. Our heuristic approach is underpinned by a set of novel techniques, including spanning graph-based WCET-aware live range splitting, WCET-aware dynamic register pressure control, WCET-aware basic block prioritization for performing integrated instruction scheduling and register allocation, and WCET-aware spill code handling. We have implemented our approach in Trimaran 4.0, and compared it with the state-of-the-art approach by using a set of 20 benchmarks. The experimental results show that our approach achieves the maximum WCET improvement of 29.61\% and the average WCET improvement of 10.23\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "120", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Micolet:2017:SDP, author = "Paul-Jules Micolet and Aaron Smith and Christophe Dubach", title = "A Study of Dynamic Phase Adaptation Using a Dynamic Multicore Processor", journal = j-TECS, volume = "16", number = "5s", pages = "121:1--121:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126523", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous processors such as ARM's big.LITTLE have become popular for embedded systems. They offer a choice between running workloads on a high performance core or a low-energy core leading to increased energy efficiency. However, the core configurations are fixed at design time which offers a limited amount of adaptation. Dynamic Multicore Processors (DMPs) bridge the gap between homogeneous and fully reconfigurable systems. Cores can fuse dynamically to adapt the computational resources to the needs of different workloads. There exists multiple examples of DMPs in the literature, yet the focus has mainly been on static partitioning. This paper conducts the first thorough study of the potential for dynamic reconfiguration of DMPs at runtime. We study how performance varies with static partitioning and what software optimizations are required to achieve high performance. We show that energy consumption is reduced considerably when adapting the number of cores to program phases, and introduce a simple online model which predicts the optimal number of cores to use to minimize energy consumption while maintaining high performance. Using the San Diego Vision Benchmark Suite as a use case, the dynamic scheme leads to $ \approx $40\% energy savings on average without decreasing performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "121", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Trub:2017:IPM, author = "Roman Tr{\"u}b and Georgia Giannopoulou and Andreas Tretter and Lothar Thiele", title = "Implementation of Partitioned Mixed-Criticality Scheduling on a Multi-Core Platform", journal = j-TECS, volume = "16", number = "5s", pages = "122:1--122:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126533", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent industrial trends favor the adoption of multi-core architectures for mixed-criticality applications. Although several mixed-criticality multi-core scheduling approaches have been proposed, currently there are few implementations on hardware that demonstrate efficient resource utilization and the ability to bound interference on shared resources. To address this necessity, we develop a mixed-criticality runtime environment on the Kalray MPPA-256 Andey many-core platform. The runtime environment implements a scheduling policy based on adaptive temporal partitioning. We develop models, methods and implementation principles to implement the necessary scheduling primitives, to achieve high platform utilization and to perform a compositional worst-case execution time analysis. The bounds account for scheduling overheads and for the inter-task interference on the platform's shared memory. Using realistic benchmarks from avionics and signal processing, we validate the correctness and tightness of the bounds and demonstrate a high platform utilization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "122", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2017:DDP, author = "Ujjwal Gupta and Chetan Arvind Patil and Ganapati Bhat and Prabhat Mishra and Umit Y. Ogras", title = "{DyPO}: Dynamic {Pareto}-Optimal Configuration Selection for Heterogeneous {MpSoCs}", journal = j-TECS, volume = "16", number = "5s", pages = "123:1--123:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126530", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern multiprocessor systems-on-chip (MpSoCs) offer tremendous power and performance optimization opportunities by tuning thousands of potential voltage, frequency and core configurations. As the workload phases change at runtime, different configurations may become optimal with respect to power, performance or other metrics. Identifying the optimal configuration at runtime is infeasible due to the large number of workloads and configurations. This paper proposes a novel methodology that can find the Pareto-optimal configurations at runtime as a function of the workload. To achieve this, we perform an extensive offline characterization to find classifiers that map performance counters to optimal configurations. Then, we use these classifiers and performance counters at runtime to choose Pareto-optimal configurations. We evaluate the proposed methodology by maximizing the performance per watt for 18 single- and multi-threaded applications. Our experiments demonstrate an average increase of 93\%, 81\% and 6\% in performance per watt compared to the interactive, on demand and powersave governors, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "123", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Naresh:2017:CCC, author = "Vignyan Reddy Kothinti Naresh and Dibakar Gope and Mikko H. Lipasti", title = "The {CURE}: Cluster Communication Using Registers", journal = j-TECS, volume = "16", number = "5s", pages = "124:1--124:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126527", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "VLIW processors typically deliver high performance on limited budget making them ideal for a variety of communication and signal processing solutions. These processors typically need large multi-ported register files that can have side effects of increased cycle time and high power consumption. The access delay and energy of these register files can also become prohibitive when increasing the register count or the access ports, thus limiting the overall performance of the processor. Most prior art circumvent this problem by using multiple clusters with private register files, to lower the access delay and reduce energy consumption. However, clustering artifacts, like increased inter--cluster communication operations and spill-recovery code, result in a performance penalty. This paper proposes CURE --- a novel technique to considerably reduce the negative effects of clustering. CURE augments the ISA to expose the communication registers to the compilers to increase availability of architectural register state to all functional units. The inter--cluster communication operations are integrated into regular ALU and memory operations to improve instruction encoding efficiency. We also propose a new code scheduling heuristic to handle the ISA changes, and to realize the improvements in processor's performance and energy consumption. Our quantitative analysis estimates that CURE, when compared to the baseline 8--issue uni--cluster processor, boosts average performance by 61\% while reducing the average register dynamic energy by 77\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "124", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Josipovic:2017:OLS, author = "Lana Josipovic and Philip Brisk and Paolo Ienne", title = "An Out-of-Order Load-Store Queue for Spatial Computing", journal = j-TECS, volume = "16", number = "5s", pages = "125:1--125:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126525", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The efficiency of spatial computing depends on the ability to achieve maximal parallelism. This necessitates memory interfaces that can correctly handle memory accesses that arrive in arbitrary order while still respecting data dependencies and ensuring appropriate ordering for semantic correctness. However, a typical memory interface for out-of-order processors (i.e., a load-store queue) cannot immediately meet these requirements: a different allocation policy is needed to achieve out-of-order execution in spatial systems that naturally omit the notion of sequential program order, a fundamental piece of information for correct execution. We show a novel and practical way to organize the allocation for an out-of-order load-store queue for spatial computing. The main idea is to dynamically allocate groups of memory accesses (depending on the dynamic behavior of the application), where the access order within the group is statically predetermined (for instance by a high-level synthesis tool). We detail the construction of our load-store queue and demonstrate on a few practical cases its advantages over standard accelerator-memory interfaces.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "125", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Crites:2017:DCE, author = "Brian Crites and Karen Kong and Philip Brisk", title = "Diagonal Component Expansion for Flow-Layer Placement of Flow-Based Microfluidic Biochips", journal = j-TECS, volume = "16", number = "5s", pages = "126:1--126:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126529", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Continuous flow-based microfluidic devices have seen a huge increase in interest because of their ability to automate and miniaturize biochemistry and biological processes, as well as their promise of creating a programmable platform for chemical and biological experimentation. The major hurdle in the adoption of these types of devices is in the design, which is largely done by hand using tools such as AutoCAD or SolidWorks, which require immense domain knowledge and are hard to scale. This paper investigates the problem of automated physical design for continuous flow-based microfluidic very large scale integration (mVLSI) biochips, starting from a netlist specification of the flow layer. After an initial planar graph embedding, vertices in the netlist are expanded into two-dimensional components, followed by fluid channel routing. A new heuristic, DIagonal Component Expansion (DICE) is introduced for the component expansion step. Compared to a baseline expansion method, DICE improves area utilization by a factor of 8.90x and reduces average fluid routing channel length by 47.4\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "126", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Elfar:2017:SER, author = "Mahmoud Elfar and Zhanwei Zhong and Zipeng Li and Krishnendu Chakrabarty and Miroslav Pajic", title = "Synthesis of Error-Recovery Protocols for Micro-Electrode-Dot-Array Digital Microfluidic Biochips", journal = j-TECS, volume = "16", number = "5s", pages = "127:1--127:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126538", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A digital microfluidic biochip (DMFB) is an attractive technology platform for various biomedical applications. However, a conventional DMFB is limited by: (i) the number of electrical connections that can be practically realized, (ii) constraints on droplet size and volume, and (iii) the need for special fabrication processes and the associated reliability/yield concerns. To overcome the above challenges, DMFBs based on a micro-electrode-dot-array (MEDA) architecture have been proposed and fabricated recently. Error recovery is of key interest for MEDA biochips due to the need for system reliability. Errors are likely to occur during droplet manipulation due to defects, chip degradation, and the uncertainty inherent in biochemical experiments. In this paper, we first formalize error-recovery objectives, and then synthesize optimal error-recovery protocols using a model based on Stochastic Multiplayer Games (SMGs). We also present a global error-recovery technique that can update the schedule of fluidic operations in an adaptive manner. Using three representative real-life bioassays, we show that the proposed approach can effectively reduce the bioassay completion time and increase the probability of success for error recovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "127", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gottscho:2017:LCM, author = "Mark Gottscho and Irina Alam and Clayton Schoeny and Lara Dolecek and Puneet Gupta", title = "Low-Cost Memory Fault Tolerance for {IoT} Devices", journal = j-TECS, volume = "16", number = "5s", pages = "128:1--128:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126534", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "IoT devices need reliable hardware at low cost. It is challenging to efficiently cope with both hard and soft faults in embedded scratchpad memories. To address this problem, we propose a two-step approach: FaultLink and Software-Defined Error-Localizing Codes (SDELC). FaultLink avoids hard faults found during testing by generating a custom-tailored application binary image for each individual chip. During software deployment-time, FaultLink optimally packs small sections of program code and data into fault-free segments of the memory address space and generates a custom linker script for a lazy-linking procedure. During run-time, SDELC deals with unpredictable soft faults via novel and inexpensive Ultra-Lightweight Error-Localizing Codes (UL-ELCs). These require fewer parity bits than single-error-correcting Hamming codes. Yet our UL-ELCs are more powerful than basic single-error-detecting parity: they localize single-bit errors to a specific chunk of a codeword. SDELC then heuristically recovers from these localized errors using a small embedded C library that exploits observable side information (SI) about the application's memory contents. SI can be in the form of redundant data (value locality), legal/illegal instructions, etc. Our combined FaultLink+SDELC approach improves min-VDD by up to 440 mV and correctly recovers from up to 90\% (70\%) of random single-bit soft faults in data (instructions) with just three parity bits per 32-bit word.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "128", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yantir:2017:AMM, author = "Hasan Erdem Yantir and Ahmed M. Eltawil and Fadi J. Kurdahi", title = "Approximate Memristive In-memory Computing", journal = j-TECS, volume = "16", number = "5s", pages = "129:1--129:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126526", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The bottleneck between the processing elements and memory is the biggest issue contributing to the scalability problem in computing. In-memory computation is an alternative approach that combines memory and processor in the same location, and eliminates the potential memory bottlenecks. Associative processors are a promising candidate for in-memory computation, however the existing implementations have been deemed too costly and power hungry. Approximate computing is another promising approach for energy-efficient digital system designs where it sacrifices the accuracy for the sake of energy reduction and speedup in error-resilient applications. In this study, approximate in-memory computing is introduced in memristive associative processors. Two approximate computing methodologies are proposed; bit trimming and memristance scaling. Results show that the proposed methods not only reduce energy consumption of in-memory parallel computing but also improve their performance. As compared to other existing approximate computing methodologies on different architectures (e.g., CPU, GPU, and ASIC), approximate memristive in-memory computing exhibits better results in terms of energy reduction (up to 80x) and speedup (up to 20x) on a variety of benchmarks from different domains when quality degradation is limited to 10\% and it confirms that memristive associative processors provide a highly-promising platform for approximate computing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "129", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Raha:2017:QIA, author = "Arnab Raha and Vijay Raghunathan", title = "{qLUT}: Input-Aware Quantized Table Lookup for Energy-Efficient Approximate Accelerators", journal = j-TECS, volume = "16", number = "5s", pages = "130:1--130:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126531", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Approximate computing has emerged as a popular design paradigm for optimizing the performance and energy consumption of error-resilient applications in domains such as machine learning, graphics, data analytics, etc. Numerous techniques for approximate computing have been proposed at different layers of the system stack, from circuits to architecture to software. In this work, we propose a new technique, called quantized table lookup, for approximating the meta-functions used in the core computational kernels of error-resilient applications. In contrast to prior work that directly approximates the functionality of the meta-functions, the proposed technique instead approximates the input data to the meta-functions by reducing/quantizing them to a much smaller set of values that we call quantized inputs. The small number of quantized inputs enables us to completely replace the energy-intensive arithmetic units in the meta-function with small and energy-efficient lookup tables (called quantized lookup tables or qLUT) that contain precomputed output values corresponding to the quantized inputs. The proposed approximation technique is not only highly generic, but also inherently quality-configurable and input-aware. Quality-configurability and input-awareness are achieved by modulating the size of the qLUT as well as selecting the values of the quantized inputs judiciously based on the statistics of the original input data. To evaluate the proposed technique, we have implemented the dominant meta-functions of nine error-resilient application benchmarks as quantized table lookup based hardware accelerators using 45nm technology. Experimental results demonstrate average energy savings of 46\% at the application-level for minimal ($<$ 1\%) loss in output quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "130", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Egilmez:2017:UAF, author = "Begum Egilmez and Matthew Schuchhardt and Gokhan Memik and Raid Ayoub and Niranjan Soundararajan and Michael Kishinevsky", title = "User-aware Frame Rate Management in {Android Smartphones}", journal = j-TECS, volume = "16", number = "5s", pages = "131:1--131:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126539", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Frame rate has a direct impact on the energy consumption of smartphones: the higher the frame rate, the higher the power consumption. Hence, reducing display refreshes will reduce the power consumption. However, it is risky to manipulate frame rate drastically as it can deteriorate user satisfaction with the device. In this work, we introduce a screen management system that controls the frame rate on smartphone displays based on a model that detects user dissatisfaction due to display refreshes. This approach is based on understanding when higher frame rates are necessary, and providing lower frame rates -thus, saving power- if the lower rate is predicted not to cause user dissatisfaction. According to the results of our first user survey with 20 participants, individuals show highly varying requirements: while some users require high frame rates for the highest satisfaction, others are equally satisfied with lower frame rates. Based on this observation, we develop a system that predicts user dissatisfaction on the runtime and either increases or decreases the maximum frame rate setting. For user dissatisfaction predictions, we have compared two different approaches: (1) static model, which uses dissatisfaction characteristics of a fixed group of people, and (2) user-specific model, which is learning only from the specific user. Our second set of experiments with 20 participants shows that users report 32\% less dissatisfaction and 4\% more dissatisfaction than the default Android system with user-specific and static systems, respectively. These experiments also show that, compared to the default scheme, our mechanisms reduce the power consumption of the phone by 7.2\% and 1.8\% on average with the user-specific and static models, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "131", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yan:2017:FFI, author = "Hao Yan and Lei Jiang and Lide Duan and Wei-Ming Lin and Eugene John", title = "{FlowPaP} and {FlowReR}: Improving Energy Efficiency and Performance for {STT-MRAM}-Based Handheld Devices under Read Disturbance", journal = j-TECS, volume = "16", number = "5s", pages = "132:1--132:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126532", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Handheld devices, such as smartphones and tablets, currently dominate the semiconductor market. The memory access patterns of CPU and IP cores are dramatically different in a handheld device, making the main memory a critical bottleneck of the entire system. As a result, non-volatile memories, such as spin transfer torque magnetoresistive random-access memory (STT-MRAM), are emerging as a replacement for the existing DRAM-based main memory, achieving a wide variety of advantages. However, replacing DRAM with STT-MRAM also results in new design challenges including read disturbance. A simple read-and-restore scheme preserves data integrity under read disturbance, but incurs significant performance and energy overheads. Consequently, by utilizing unique characteristics of mobile applications, we propose FlowPaP, a flow pattern prediction scheme to dynamically predict the write-to-last-read distances for data frames running on a handheld device. FlowPaP identifies and removes unnecessary memory restores originally required for preventing read disturbance, significantly improving energy efficiency and performance for STT-MRAM-based handheld devices. In addition, we propose a flow-based data retention time reduction scheme named FlowReR to further lower energy consumption of STT-MRAM at the expense of reducing its data retention time. FlowReR imposes a second step that marginally trades off the already improved energy efficiency for performance improvements. Experimental results show that, compared to the original read-and-restore scheme, the application of FlowPaP and FlowReR together can simultaneously improve energy efficiency by 34\% and performance by 17\% for a set of commonly used Android applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "132", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rai:2017:UCG, author = "Siddharth Rai and Mainak Chaudhuri", title = "Using Criticality of {GPU} Accesses in Memory Management for {CPU--GPU} Heterogeneous Multi-Core Processors", journal = j-TECS, volume = "16", number = "5s", pages = "133:1--133:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126540", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous chip-multiprocessors with CPU and GPU integrated on the same die allow sharing of critical memory system resources among the CPU and GPU applications. Such architectures give rise to challenging resource scheduling problems. In this paper, we explore memory access scheduling algorithms driven by criticality of GPU accesses in such systems. Different GPU access streams originate from different parts of the GPU rendering pipeline, which behaves very differently from the typical CPU pipeline requiring new techniques for GPU access criticality estimation. We propose a novel queuing network model to estimate the performance-criticality of the GPU access streams. If a GPU application performs below the quality of service requirement (e.g., frame rate in 3D scene rendering), the memory access scheduler uses the estimated criticality information to accelerate the critical GPU accesses. Detailed simulations done on a heterogeneous chip-multiprocessor model with one GPU and four CPU cores running heterogeneous mixes of DirectX, OpenGL, and CPU applications show that our proposal improves the GPU performance by 15\% on average without degrading the CPU performance much. Extensions proposed for the mixes containing GPGPU applications, which do not have any quality of service requirement, improve the performance by 7\% on average for these mixes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "133", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kang:2017:RLA, author = "Wonkyung Kang and Dongkun Shin and Sungjoo Yoo", title = "Reinforcement Learning-Assisted Garbage Collection to Mitigate Long-Tail Latency in {SSD}", journal = j-TECS, volume = "16", number = "5s", pages = "134:1--134:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126537", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "NAND flash memory is widely used in various systems, ranging from real-time embedded systems to enterprise server systems. Because the flash memory has erase-before-write characteristics, we need flash-memory management methods, i.e., address translation and garbage collection. In particular, garbage collection (GC) incurs long-tail latency, e.g., 100 times higher latency than the average latency at the 99$^{th}$ percentile. Thus, real-time and quality-critical systems fail to meet the given requirements such as deadline and QoS constraints. In this study, we propose a novel method of GC based on reinforcement learning. The objective is to reduce the long-tail latency by exploiting the idle time in the storage system. To improve the efficiency of the reinforcement learning-assisted GC scheme, we present new optimization methods that exploit fine-grained GC to further reduce the long-tail latency. The experimental results with real workloads show that our technique significantly reduces the long-tail latency by 29--36\% at the 99.99$^{th}$ percentile compared to state-of-the-art schemes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "134", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tretter:2017:MAC, author = "Andreas Tretter and Georgia Giannopoulou and Matthias Baer and Lothar Thiele", title = "Minimising Access Conflicts on Shared Multi-Bank Memory", journal = j-TECS, volume = "16", number = "5s", pages = "135:1--135:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126535", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A common multi-core pattern consists of processors communicating through shared, multi-banked on-chip memory. Two approaches exist: Interleaved address mapping, which spreads consecutive data over all banks, and contiguous address mapping, which stores consecutive data on a single bank. In this work, we compare both approaches on the Kalray MPPA-256 platform. For contiguous mapping, we propose an algorithm, based on graph colouring techniques, to automatically perform the assignment of data blocks to memory banks with the goal of minimising access collisions and delays. Experiments with representative, parallel real-world benchmarks show that 69\% of the tested configurations, when optimised for contiguous mapping by our algorithm, run up to 86\% faster on average than with interleaved mapping.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "135", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Riazi:2017:CSC, author = "M. Sadegh Riazi and Mohammad Samragh and Farinaz Koushanfar", title = "{CAMsure}: Secure Content-Addressable Memory for Approximate Search", journal = j-TECS, volume = "16", number = "5s", pages = "136:1--136:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126547", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We introduce CAMsure, the first realization of secure Content Addressable Memory (CAM) in the context of approximate search using near-neighbor algorithms. CAMsure provides a lightweight solution for practical secure (approximate) search with a minimal drop in the accuracy of the search results. CAM has traditionally been used as a hardware search engine that explores the entire memory in a single clock cycle. However, there has been little attention to the security of the data stored in CAM. Our approach stores distance-preserving hash embeddings within CAM to ensure data privacy. The hashing method provides data confidentiality while preserving similarity in the sense that a high resemblance in the data domain is translated to a small Hamming distance in the hash domain. Consequently, the objective of near-neighbor search is converted to approximate lookup table search which is compatible with the realizations of emerging content addressable memories. Our methodology delivers on average two orders of magnitude faster response time compared to RAM-based solutions that preserve the privacy of data owners.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "136", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Piccolboni:2017:ECF, author = "Luca Piccolboni and Alessandro Menon and Graziano Pravadelli", title = "Efficient Control-Flow Subgraph Matching for Detecting Hardware {Trojans} in {RTL} Models", journal = j-TECS, volume = "16", number = "5s", pages = "137:1--137:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126552", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Only few solutions for Hardware Trojan (HT) detection work at Register-Transfer Level (RTL), thus delaying the identification of possible security issues at lower abstraction levels of the design process. In addition, the most of existing approaches work only for specific kinds of HTs. To overcome these limitations, we present a verification approach that detects different types of HTs in RTL models by exploiting an efficient control-flow subgraph matching algorithm. The prototypes of HTs that can be detected are modelled in a library by using Control-Flow Graphs (CFGs) that can be parametrised and extended to cover several variants of Trojan patterns. Experimental results show that our approach is effective and efficient in comparison with other state-of-the-art solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "137", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Migliore:2017:HSA, author = "Vincent Migliore and C{\'e}dric Seguin and Maria M{\'e}ndez Real and Vianney Lapotre and Arnaud Tisserand and Caroline Fontaine and Guy Gogniat and Russell Tessier", title = "A High-Speed Accelerator for Homomorphic Encryption using the {Karatsuba} Algorithm", journal = j-TECS, volume = "16", number = "5s", pages = "138:1--138:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126558", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Somewhat Homomorphic Encryption (SHE) schemes can be used to carry out operations on ciphered data. In a cloud computing scenario, personal information can be processed secretly, inferring a high level of confidentiality. The principle limitation of SHE is the size of ciphertext compared to the size of the message. This issue can be addressed by using a batching technique that ``packs'' several messages into one ciphertext. However, this method leads to important drawbacks in standard implementations. This paper presents a fast hardware/software co-design implementation of an encryption procedure using the Karatsuba algorithm. Our hardware accelerator is 1.5 times faster than the state of the art for 1 encryption and 4 times faster for 4 encryptions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "138", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2017:FAK, author = "Jiacheng Zhang and Youyou Lu and Jiwu Shu and Xiongjun Qin", title = "{FlashKV}: Accelerating {KV} Performance with Open-Channel {SSDs}", journal = j-TECS, volume = "16", number = "5s", pages = "139:1--139:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126545", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the cost-per-bit of solid state disks is decreasing quickly, SSDs are supplanting HDDs in many cases, including the primary storage of key-value stores. However, simply deploying LSM-tree-based key-value stores on commercial SSDs is inefficient and induces heavy write amplification and severe garbage collection overhead under write-intensive conditions. The main cause of these critical issues comes from the triple redundant management functionalities lying in the LSM-tree, file system and flash translation layer, which block the awareness between key-value stores and flash devices. Furthermore, we observe that the performance of LSM-tree-based key-value stores is improved little by only eliminating these redundant layers, as the I/O stacks, including the cache and scheduler, are not optimized for LSM-tree's unique I/O patterns. To address the issues above, we propose FlashKV, an LSM-tree based key-value store running on open-channel SSDs. FlashKV eliminates the redundant management and semantic isolation by directly managing the raw flash devices in the application layer. With the domain knowledge of LSM-tree and the open-channel information, FlashKV employs a parallel data layout to exploit the internal parallelism of the flash device, and optimizes the compaction, caching and I/O scheduling mechanisms specifically. Evaluations show that FlashKV effectively improves system performance by $ 1.5 \times $ to $ 4.5 \times $ and decreases up to 50\% write traffic under heavy write conditions, compared to LevelDB.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "139", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2017:PBB, author = "Hong Seok Kim and Eyee Hyun Nam and Ji Hyuck Yun and Sheayun Lee and Sang Lyul Min", title = "{P-BMS}: a Bad Block Management Scheme in Parallelized Flash Memory Storage Devices", journal = j-TECS, volume = "16", number = "5s", pages = "140:1--140:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126550", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Flash memory is used as a main data storage medium in increasingly large areas of applications, rapidly replacing hard disk drives because of its low power consumption, fast random access, and high shock resistance. Such flash-based storage devices generally incorporate multiple flash memory chips to meet the ever growing capacity demands. Using multiple chips in a single storage device, at the same time, opens an opportunity to boost the performance based on multi-unit parallelism. However, parallel execution of multiple flash operations introduces complications when bad blocks occur, which is unavoidable due to flash memory's physical characteristics. The situation gets even worse when bad block occurrences are accompanied by sudden power failures. We propose a bad block management scheme called P-BMS that can fully utilize flash-level parallelism, while guaranteeing provably correct block replacement. Experiments show that our P-BMS achieves a throughput that is more than 95\% of the maximum bandwidth of the flash controller, even with bad block occurrences far heavier than in real flash memory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "140", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2017:PIE, author = "Fei Wu and Meng Zhang and Yajuan Du and Xubin He and Ping Huang and Changsheng Xie and Jiguang Wan", title = "A Program Interference Error Aware {LDPC} Scheme for Improving {NAND} Flash Decoding Performance", journal = j-TECS, volume = "16", number = "5s", pages = "141:1--141:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126563", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "By scaling down to smaller cell size, NAND flash has significantly increased the storage capacity in order to lower the unit cost down. However, the reliability is sacrificed due to much higher raw bit error rates. As a result, conventional error correction codes (ECCs), such as BCH codes, are not sufficient. Low-density parity check (LDPC) codes with stronger error correction capability are adopted in NAND flash to guarantee data reliability. However, read performance using LDPC is poor because of its decoding complexity. It has been found that flash cells with fewer electrons are more prone to program interference errors. As a result, program interference errors show the characteristic of value dependence. This characteristic can be exploited and translated into extra information facilitating the decoding convergence. Motivated by this observation, we propose PEAL: a flash program interference error aware LDPC scheme to enhance the decoding performance. PEAL integrates the obtained extra information from the value dependence into the soft-to-hard decision process in LDPC decoding to decrease decoding iterations and improve the decoding convergence speed. Simulation results show that decoding iterations are reduced by up to 69.37\% and the decoding convergence speed is improved by up to $ 2.5 \times $, compared with the normalized min-sum (NMS) algorithm with 2KB information lengths at an approximate raw bit error rate of $ 11.5 \times 10^{-3} $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "141", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:PAP, author = "Yi Wang and Lisha Dong and Rui Mao", title = "{P-Alloc}: Process-Variation Tolerant Reliability Management for {$3$D} Charge-Trapping Flash Memory", journal = j-TECS, volume = "16", number = "5s", pages = "142:1--142:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126554", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Three-dimensional (3D) flash memory is an emerging memory technology that enables a number of improvements to conventional planar NAND flash memory, including larger capacity, less program disturbance, and lower access latency. In contrast to conventional planar flash memory, 3D flash memory adopts charge-trapping mechanism. NAND strings punch through multiple stacked layers to form the three-dimensional infrastructure. However, the etching processes for NAND strings are unable to produce perfectly vertical features, especially on the scale of 20 nanometers or less. The process variation will cause uneven distribution of electrons, which poses a threat to the integrity of data stored in flash. This paper present P-Alloc, a process-variation tolerant reliability management strategy for 3D charge-trapping flash memory. P-Alloc offers both hardware and software support to allocate data to the 3D flash in the presence of process variation. P-Alloc predicts the state of a physical page, i.e., the basic unit for each write or read operation in flash memory, and tries to assign critical data to more reliable pages. A hardware-based voltage threshold compensation scheme is also proposed to further reduce the faults. We demonstrate the viability of the proposed scheme using a variety of realistic workloads. Our extensive evaluations show that, P-Alloc significantly enhances the reliability and reduces the access latency compared to the baseline scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "142", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tan:2017:ASA, author = "Benjamin Tan and Morteza Biglari-Abhari and Zoran Salcic", title = "An Automated Security-Aware Approach for Design of Embedded Systems on {MPSoC}", journal = j-TECS, volume = "16", number = "5s", pages = "143:1--143:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126553", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "MPSoC-based embedded systems design is becoming increasingly complex. Not only do we need to satisfy multiple design objectives, we increasingly need to address potential security risks. In this work, we propose a security-aware systematic design approach which explores the design space, given a system-level application description, by generating potential architecture configurations of execution platform nodes that are interconnected using a NoC. We then perform automated security analysis to check the generated configurations against designer-specified security constraints. Following the analysis, we use an automated architecture configuration refinement process to generate a list of security additions that are inserted into the initial configuration so that the security constraints are satisfied. By performing this refinement on several candidate configuration options, we can explore the trade-off between resource cost and security. In this paper, we illustrate the proposed approach using a Smart Home Control System application.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "143", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsoutsouras:2017:SSO, author = "Vasileios Tsoutsouras and Dimosthenis Masouros and Sotirios Xydis and Dimitrios Soudris", title = "{SoftRM}: Self-Organized Fault-Tolerant Resource Management for Failure Detection and Recovery in {NoC} Based Many-Cores", journal = j-TECS, volume = "16", number = "5s", pages = "144:1--144:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126562", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many-core systems are envisioned to leverage the ever-increasing demand for more powerful computing systems. To provide the necessary computing power, the number of Processing Elements integrated on-chip increases and NoC based infrastructures are adopted to address the interconnection scalability. The advent of these new architectures surfaces the need for more sophisticated, distributed resource management paradigms, which in addition to the extreme integration scaling, make the new systems more prone to errors manifested both at hardware and software. In this work, we highlight the need for Run-Time Resource management to be enhanced with fault tolerance features and propose SoftRM, a resource management framework which can dynamically adapt to permanent failures in a self-organized, workload-aware manner. Self-organization allows the resource management agents to recover from a failure in a coordinated way by electing a new agent to replace the failed one, while workload awareness optimizes this choice according to the status of each core. We evaluate the proposed framework on Intel Single-chip Cloud Computer (SCC), a NoC based many-core system and customize it to achieve minimum interference on the resource allocation process. We showcase that its workload-aware features manage to utilize free resources in more that 90\% of the conducted experiments. Comparison with relevant state-of-the-art fault tolerant frameworks shows decrease of up to 67\% in the imposed overhead on application execution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "144", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhat:2017:PTS, author = "Ganapati Bhat and Suat Gumussoy and Umit Y. Ogras", title = "Power-Temperature Stability and Safety Analysis for Multiprocessor Systems", journal = j-TECS, volume = "16", number = "5s", pages = "145:1--145:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126567", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern multiprocessor system-on-chips (SoCs) integrate multiple heterogeneous cores to achieve high energy efficiency. The power consumption of each core contributes to an increase in the temperature across the chip floorplan. In turn, higher temperature increases the leakage power exponentially, and leads to a positive feedback with nonlinear dynamics. This paper presents a power-temperature stability and safety analysis technique for multiprocessor systems. This analysis reveals the conditions under which the power-temperature trajectory converges to a stable fixed point. We also present a simple formula to compute the stable fixed point and maximum thermally-safe power consumption at runtime. Hardware measurements on a state-of-the-art mobile processor show that our analytical formulation can predict the stable fixed point with an average error of 2.6\%. Hence, our approach can be used at runtime to ensure thermally safe operation and guard against thermal threats.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "145", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:CEG, author = "Siqi Wang and Guanwen Zhong and Tulika Mitra", title = "{CGPredict}: Embedded {GPU} Performance Estimation from Single-Threaded Applications", journal = j-TECS, volume = "16", number = "5s", pages = "146:1--146:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126546", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous multiprocessor system-on-chip architectures are endowed with accelerators such as embedded GPUs and FPGAs capable of general-purpose computation. The application developers for such platforms need to carefully choose the accelerator with the maximum performance benefit. For a given application, usually, the reference code is specified in a high-level single-threaded programming language such as C. The performance of an application kernel on an accelerator is a complex interplay among the exposed parallelism, the compiler, and the accelerator architecture. Thus, determining the performance of a kernel requires its redevelopment into each accelerator-specific language, causing substantial wastage of time and effort. To aid the developer in this early design decision, we present an analytical framework CGPredict to predict the performance of a computational kernel on an embedded GPU architecture from un-optimized, single-threaded C code. The analytical approach provides insights on application characteristics which suggest further application-specific optimizations. The estimation error is as low as 2.66\% (average 9\%) compared to the performance of the same kernel written in native CUDA code running on NVIDIA Kepler embedded GPU. This low performance estimation error enables CGPredict to provide an early design recommendation of the accelerator starting from C code.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "146", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Singh:2017:EER, author = "Amit Kumar Singh and Alok Prakash and Karunakar Reddy Basireddy and Geoff V. Merrett and Bashir M. Al-Hashimi", title = "Energy-Efficient Run-Time Mapping and Thread Partitioning of Concurrent {OpenCL} Applications on {CPU--GPU MPSoCs}", journal = j-TECS, volume = "16", number = "5s", pages = "147:1--147:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126548", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous Multi-Processor Systems-on-Chips (MPSoCs) containing CPU and GPU cores are typically required to execute applications concurrently. However, as will be shown in this paper, existing approaches are not well suited for concurrent applications as they are developed either by considering only a single application or they do not exploit both CPU and GPU cores at the same time. In this paper, we propose an energy-efficient run-time mapping and thread partitioning approach for executing concurrent OpenCL applications on both GPU and GPU cores while satisfying performance requirements. Depending upon the performance requirements, for each concurrently executing application, the mapping process finds the appropriate number of CPU cores and operating frequencies of CPU and GPU cores, and the partitioning process identifies an efficient partitioning of the applications' threads between CPU and GPU cores. We validate the proposed approach experimentally on the Odroid-XU3 hardware platform with various mixes of applications from the Polybench benchmark suite. Additionally, a case-study is performed with a real-world application SLAMBench. Results show an average energy saving of 32\% compared to existing approaches while still satisfying the performance requirements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "147", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Oneal:2017:GPE, author = "Kenneth O'neal and Philip Brisk and Ahmed Abousamra and Zack Waters and Emily Shriver", title = "{GPU} Performance Estimation using Software Rasterization and Machine Learning", journal = j-TECS, volume = "16", number = "5s", pages = "148:1--148:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126557", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper introduces a predictive modeling framework to estimate the performance of GPUs during pre-silicon design. Early-stage performance prediction is useful when simulation times impede development by rendering driver performance validation, API conformance testing and design space explorations infeasible. Our approach builds a Random Forest regression model to analyze DirectX 3D workload behavior when executed by a software rasterizer, which we have extended with a workload characterizer to collect further performance information via program counters. In addition to regression models, this work produces detailed feature rankings which can provide valuable architectural insight, and accurate performance estimates for an Intel integrated Skylake generation GPU. Our models achieve reasonable out-of-sample-error rates of 14\%, with an average simulation speedup of 327x.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "148", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fezzardi:2017:UEP, author = "Pietro Fezzardi and Marco Lattuada and Fabrizio Ferrandi", title = "Using Efficient Path Profiling to Optimize Memory Consumption of On-Chip Debugging for High-Level Synthesis", journal = j-TECS, volume = "16", number = "5s", pages = "149:1--149:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126564", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "High-Level Synthesis (HLS) for FPGAs is attracting popularity and is increasingly used to handle complex systems with multiple integrated components. To increase performance and efficiency, HLS flows now adopt several advanced optimization techniques. Aggressive optimizations and system level integration can cause the introduction of bugs that are only observable on-chip. Debugging support for circuits generated with HLS is receiving a considerable attention. Among the data that can be collected on chip for debugging, one of the most important is the state of the Finite State Machines (FSM) controlling the components of the circuit. However, this usually requires a large amount of memory to trace the behavior during the execution. This work proposes an approach that takes advantage of the HLS information and of the structure of the FSM to compress control flow traces and to integrate optimized components for on-chip debugging. The generated checkers analyze the FSM execution on-fly, automatically notifying when a bug is detected, localizing it and providing data about its cause. The traces are compressed using a software profiling technique, called Efficient Path Profiling (EPP), adapted for the debugging of hardware accelerators generated with HLS. With this technique, the size of the memory used to store control flow traces can be reduced up to 2 orders of magnitude, compared to state-of-the-art.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "149", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Piccolboni:2017:CCH, author = "Luca Piccolboni and Paolo Mantovani and Giuseppe {Di Guglielmo} and Luca P. Carloni", title = "{COSMOS}: Coordination of High-Level Synthesis and Memory Optimization for Hardware Accelerators", journal = j-TECS, volume = "16", number = "5s", pages = "150:1--150:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126566", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hardware accelerators are key to the efficiency and performance of system-on-chip (SoC) architectures. With high-level synthesis (HLS), designers can easily obtain several performance-cost trade-off implementations for each component of a complex hardware accelerator. However, navigating this design space in search of the Pareto-optimal implementations at the system level is a hard optimization task. We present COSMOS, an automatic methodology for the design-space exploration (DSE) of complex accelerators, that coordinates both HLS and memory optimization tools in a compositional way. First, thanks to the co-design of datapath and memory, COSMOS produces a large set of Pareto-optimal implementations for each component of the accelerator. Then, COSMOS leverages compositional design techniques to quickly converge to the desired trade-off point between cost and performance at the system level. When applied to the system-level design (SLD) of an accelerator for wide-area motion imagery (WAMI), COSMOS explores the design space as completely as an exhaustive search, but it reduces the number of invocations to the HLS tool by up to $ 14.6 \times $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "150", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Motamedi:2017:MIR, author = "Mohammad Motamedi and Daniel Fong and Soheil Ghiasi", title = "Machine Intelligence on Resource-Constrained {IoT} Devices: The Case of Thread Granularity Optimization for {CNN} Inference", journal = j-TECS, volume = "16", number = "5s", pages = "151:1--151:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126555", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Despite their remarkable performance in various machine intelligence tasks, the computational intensity of Convolutional Neural Networks (CNNs) has hindered their widespread utilization in resource-constrained embedded and IoT systems. To address this problem, we present a framework for synthesis of efficient CNN inference software targeting mobile SoC platforms. We argue that thread granularity can substantially impact the performance and energy dissipation of the synthesized inference software, and demonstrate that launching the maximum number of logical threads, often promoted as a guiding principle by GPGPU practitioners, does not result in an efficient implementation for mobile SoCs. We hypothesize that the runtime of a CNN layer on a particular SoC platform can be accurately estimated as a linear function of its computational complexity, which may seem counter-intuitive, as modern mobile SoCs utilize a plethora of heterogeneous architectural features and dynamic resource management policies. Consequently, we develop a principled approach and a data-driven analytical model to optimize granularity of threads during CNN software synthesis. Experimental results with several modern CNNs mapped to a commodity Android smartphone with a Snapdragon SoC show up to 2.37X speedup in application runtime, and up to 1.9X improvement in its energy dissipation compared to existing approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "151", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vougioukas:2017:NFS, author = "Ilias Vougioukas and Andreas Sandberg and Stephan Diestelhorst and Bashir M. Al-Hashimi and Geoff V. Merrett", title = "Nucleus: Finding the Sharing Limit of Heterogeneous Cores", journal = j-TECS, volume = "16", number = "5s", pages = "152:1--152:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126544", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous multi-processors are designed to bridge the gap between performance and energy efficiency in modern embedded systems. This is achieved by pairing Out-of-Order (OoO) cores, yielding performance through aggressive speculation and latency masking, with In-Order (InO) cores, that preserve energy through simpler design. By leveraging migrations between them, workloads can therefore select the best setting for any given energy/delay envelope. However, migrations introduce execution overheads that can hurt performance if they happen too frequently. Finding the optimal migration frequency is critical to maximize energy savings while maintaining acceptable performance. We develop a simulation methodology that can (1) isolate the hardware effects of migrations from the software, (2) directly compare the performance of different core types, (3) quantify the performance degradation and (4) calculate the cost of migrations for each case. To showcase our methodology we run mibench, a microbenchmark suite, and show that migrations can happen as fast as every 100k instructions with little performance loss. We also show that, contrary to numerous recent studies, hypothetical designs do not need to share all of their internal components to be able to migrate at that frequency. Instead, we propose a feasible system that shares level 2 caches and a translation lookaside buffer that matches performance and efficiency. Our results show that there are phases comprising up to 10\% that a migration to the OoO core leads to performance benefits without any additional energy cost when running on the InO core, and up to 6\% of phases where a migration to the InO core can save energy without affecting performance. When considering a policy that focuses on improving the energy-delay product, results show that on average 66\% of the phases can be migrated to deliver equal or better system operation without having to aggressively share the entire memory system or to revert to migration periods finer than 100k instructions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "152", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Papagiannopoulou:2017:ETE, author = "Dimitra Papagiannopoulou and Andrea Marongiu and Tali Moreshet and Maurice Herlihy and R. Iris Bahar", title = "{Edge-TM}: Exploiting Transactional Memory for Error Tolerance and Energy Efficiency", journal = j-TECS, volume = "16", number = "5s", pages = "153:1--153:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126556", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Scaling of semiconductor devices has enabled higher levels of integration and performance improvements at the price of making devices more susceptible to the effects of static and dynamic variability. Adding safety margins (guardbands) on the operating frequency or supply voltage prevents timing errors, but has a negative impact on performance and energy consumption. We propose Edge-TM, an adaptive hardware/software error management policy that (i) optimistically scales the voltage beyond the edge of safe operation for better energy savings and (ii) works in combination with a Hardware Transactional Memory (HTM)-based error recovery mechanism. The policy applies dynamic voltage scaling (DVS) (while keeping frequency fixed) based on the feedback provided by HTM, which makes it simple and generally applicable. Experiments on an embedded platform show our technique capable of 57\% energy improvement compared to using voltage guardbands and an extra 21--24\% improvement over existing state-of-the-art error tolerance solutions, at a nominal area and time overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "153", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vogel:2017:EVM, author = "Pirmin Vogel and Andreas Kurth and Johannes Weinbuch and Andrea Marongiu and Luca Benini", title = "Efficient Virtual Memory Sharing via On-Accelerator Page Table Walking in Heterogeneous Embedded {SoCs}", journal = j-TECS, volume = "16", number = "5s", pages = "154:1--154:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126560", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Shared virtual memory is key in heterogeneous systems on chip (SoCs) that combine a general-purpose host processor with a many-core accelerator, both for programmability and performance. In contrast to the full-blown, hardware-only solutions predominant in modern high-end systems, lightweight hardware-software co-designs are better suited in the context of more power- and area-constrained embedded systems and provide additional benefits in terms of flexibility and predictability. As a downside, the latter solutions require the host to handle in software synchronization in case of page misses as well as miss handling. This may incur considerable run-time overheads. In this work, we present a novel hardware-software virtual memory management approach for many-core accelerators in heterogeneous embedded SoCs. It exploits an accelerator-side helper thread concept that enables the accelerator to manage its virtual memory hardware autonomously while operating cache-coherently on the page tables of the user-space processes of the host. This greatly reduces overhead with respect to host-side solutions while retaining flexibility. We have validated the design with a set of parameterizable benchmarks and real-world applications covering various application domains. For purely memory-bound kernels, the accelerator performance improves by a factor of 3.8 compared with host-based management and lies within 50\% of a lower-bound ideal memory management unit.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "154", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khouzani:2017:DBS, author = "Hoda Aghaei Khouzani and Chengmo Yang", title = "A {DWM}-Based Stack Architecture Implementation for Energy Harvesting Systems", journal = j-TECS, volume = "16", number = "5s", pages = "155:1--155:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126543", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy harvesting systems tend to use non-volatile processors to conduct computation under intermittent power supplies. While previous implementations of non-volatile processors are based on register architectures, stack architecture, known for its simplicity and small footprint, seems to be a better fit for energy harvesting systems. In this work, Domain Wall Memory (DWM) is used to implement ZPU, the world's smallest working CPU. Not only does DWM offer ultra-high density and SRAM-comparable access latency, but the sequential access structure of DWM also makes it well suited for a stack whose accesses display high temporal locality. As the performance and energy of DWM are determined by the number of shift operations performed to access the stack, this paper further reduces shift operations through novel data placement and micro-code transformation optimizations. The impact of compiler optimization techniques on the number of shift operations is also investigated so as to select the most effective optimizations for DWM-based stack machine. Experimental studies confirm the effectiveness of the proposed DWM-based stack architectures in improving the performance and energy-efficiency of energy harvesting systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "155", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2017:FPC, author = "Jaehyun Park and Hitesh Joshi and Hyung Gyu Lee and Sayfe Kiaei and Umit Y. Ogras", title = "Flexible {PV}-cell Modeling for Energy Harvesting in Wearable {IoT} Applications", journal = j-TECS, volume = "16", number = "5s", pages = "156:1--156:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126568", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wearable devices with sensing, processing and communication capabilities have become feasible with the advances in internet-of-things (IoT) and low power design technologies. Energy harvesting is extremely important for wearable IoT devices due to size and weight limitations of batteries. One of the most widely used energy harvesting sources is photovoltaic cell (PV-cell) owing to its simplicity and high output power. In particular, flexible PV-cells offer great potential for wearable applications. This paper models, for the first time, how bending a PV-cell significantly impacts the harvested energy. Furthermore, we derive an analytical model to quantify the harvested energy as a function of the radius of curvature. We validate the proposed model empirically using a commercial PV-cell under a wide range of bending scenarios, light intensities and elevation angles. Finally, we show that the proposed model can accelerate maximum power point tracking algorithms and increase the harvested energy by up to 25.0\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "156", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Andalam:2017:NEM, author = "Sidharta Andalam and Nathan Allen and Avinash Malik and Partha S. Roop and Mark Trew", title = "A Novel Emulation Model of the Cardiac Conduction System", journal = j-TECS, volume = "16", number = "5s", pages = "157:1--157:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126542", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Models of the cardiac conduction system are usually at two extremes: (1) high fidelity models with excellent precision but lacking a real-time response for emulation (hardware in the loop simulation); or (2) models amenable for emulation, but that do not exhibit appropriate dynamic response, which is necessary for arrhythmia susceptibility. We introduce two abstractions to remedy the situation. The first abstraction is a new cell model, which is a semi-linear hybrid automata. The proposed model is as computationally efficient as current state-of-the-art cell models amenable for emulation. Yet, unlike these models, it is also able to capture the dynamic response of the cardiac cell like the higher-fidelity models. The second abstraction is the use of smooth-tokens to develop a new path model, connecting cells, which is efficient in terms of memory consumption. Moreover, the memory requirements of the path model can be statically bounded and are invariant to the emulation step size. Results show that the proposed semi-linear abstraction for the cell reduces the execution time by up to 44\%. Furthermore, the smooth-tokens based path model reduces the memory consumption by 40 times when compared to existing path models. This paves the way for the emulation of complex cardiac conduction systems, using hardware code-generators.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "157", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rouhani:2017:RAF, author = "Bita Darvish Rouhani and Azalia Mirhoseini and Farinaz Koushanfar", title = "{RISE}: an Automated Framework for Real-Time Intelligent Video Surveillance on {FPGA}", journal = j-TECS, volume = "16", number = "5s", pages = "158:1--158:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126549", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper proposes RISE, an automated Reconfigurable framework for real-time background subtraction applied to Intelligent video SurveillancE. RISE is devised with a new streaming-based methodology that adaptively learns/updates a corresponding dictionary matrix from background pixels as new video frames are captured over time. This dictionary is used to highlight the foreground information in each video frame. A key characteristic of RISE is that it adaptively adjusts its dictionary for diverse lighting conditions and varying camera distances by continuously updating the corresponding dictionary. We evaluate RISE on natural-scene vehicle images of different backgrounds and ambient illuminations. To facilitate automation, we provide an accompanying API that can be used to deploy RISE on FPGA-based system-on-chip platforms. We prototype RISE for end-to-end deployment of three widely-adopted image processing tasks used in intelligent transportation systems: License Plate Recognition (LPR), image denoising/reconstruction, and principal component analysis. Our evaluations demonstrate up to 87-fold higher throughput per energy unit compared to the prior-art software solution executed on ARM Cortex-A15 embedded platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "158", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Basu:2017:IUL, author = "Soumya Basu and Loris Duch and Rub{\'e}n Braojos and Giovanni Ansaloni and Laura Pozzi and David Atienza", title = "An Inexact Ultra-low Power Bio-signal Processing Architecture With Lightweight Error Recovery", journal = j-TECS, volume = "16", number = "5s", pages = "159:1--159:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126565", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The energy efficiency of digital architectures is tightly linked to the voltage level (Vdd) at which they operate. Aggressive voltage scaling is therefore mandatory when ultra-low power processing is required. Nonetheless, the lowest admissible Vdd is often bounded by reliability concerns, especially since static and dynamic non-idealities are exacerbated in the near-threshold region, imposing costly guard-bands to guarantee correctness under worst-case conditions. A striking alternative, explored in this paper, waives the requirement for unconditional correctness, undergoing more relaxed constraints. First, after a run-time failure, processing correctly resumes at a later point in time. Second, failures induce a limited Quality-of-Service (QoS) degradation. We focus our investigation on the practical scenario of embedded bio-signal analysis, a domain in which energy efficiency is key, while applications are inherently error-tolerant to a certain degree. Targeting a domain-specific multi-core platform, we present a study of the impact of inexactness on application-visible errors. Then, we introduce a novel methodology to manage them, which requires minimal hardware resources and a negligible energy overhead. Experimental evidence show that, by tolerating 900 errors/hour, the resulting inexact platform can achieve an efficiency increase of up to 24\%, with a QoS degradation of less than 3\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "159", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{VanPinxten:2017:OSR, author = "Joost {Van Pinxten} and Umar Waqas and Marc Geilen and Twan Basten and Lou Somers", title = "Online Scheduling of $2$-Re-entrant Flexible Manufacturing Systems", journal = j-TECS, volume = "16", number = "5s", pages = "160:1--160:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126551", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Online scheduling of operations is essential to optimize productivity of flexible manufacturing systems (FMSs) where manufacturing requests arrive on the fly. An FMS processes products according to a particular flow through processing stations. This work focusses on online scheduling of re-entrant FMSs with flows using processing stations where products pass twice and with limited buffering between processing stations. This kind of FMS is modelled as a re-entrant flow shop with due dates and sequence-dependent set-up times. Such flow shops can benefit from minimization of the time penalties incurred from set-up times. On top of an existing greedy scheduling heuristic we apply a meta-heuristic that simultaneously explores several alternatives considering trade-offs between the used metrics by the scheduling heuristic. We identify invariants to efficiently remove many infeasible scheduling options so that the running time of online implementations is improved. The resulting algorithm is much faster than the state of the art and produces schedules with on average 4.6\% shorter makespan.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "160", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Beckert:2017:RTA, author = "Matthias Beckert and Rolf Ernst", title = "Response Time Analysis for Sporadic Server Based Budget Scheduling in Real Time Virtualization Environments", journal = j-TECS, volume = "16", number = "5s", pages = "161:1--161:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126559", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Virtualization techniques for embedded real-time systems typically employ TDMA scheduling to achieve temporal isolation among different virtualized applications. Recent work already introduced sporadic server based solutions relying on budgets instead of a fixed TDMA schedule. While providing better average-case response times for IRQs and tasks, a formal response time analysis for the worst-case is still missing. In order to confirm the advantage of a sporadic server based budget scheduling, this paper provides a worst-case response time analysis. To improve the sporadic server based budget scheduling even more, we provide a background scheduling implementation which will also be covered by the formal analysis. We show correctness of the analysis approach and compare it against TDMA based systems. In addition to that, we provide response time measurements from a working hypervisor implementation on an ARM based development board.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "161", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:RTD, author = "Xiaowen Chen and Zhonghai Lu and Sheng Liu and Shuming Chen", title = "Round-trip {DRAM} Access Fairness in {$3$D} {NoC-based} Many-core Systems", journal = j-TECS, volume = "16", number = "5s", pages = "162:1--162:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126561", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In 3D NoC-based many-core systems, DRAM accesses behave differently due to their different communication distances and the latency gap of different DRAM accesses becomes bigger as the network size increases, which leads to unfair DRAM access performance among different nodes. This phenomenon may lead to high latencies for some DRAM accesses that become the performance bottleneck of the system. The paper addresses the DRAM access fairness problem in 3D NoC-based many-core systems by narrowing the latency difference of DRAM accesses as well as reducing the maximum latency. Firstly, the latency of a round-trip DRAM access is modeled and the factors causing DRAM access latency difference are discussed in detail. Secondly, the DRAM access fairness is further quantitatively analyzed through experiments. Thirdly, we propose to predict the network latency of round-trip DRAM accesses and use the predicted round-trip DRAM access time as the basis to prioritize the DRAM accesses in DRAM interfaces so that the DRAM accesses with potential high latencies can be transferred as early and fast as possible, thus achieving fair DRAM access. Experiments with synthetic and application workloads validate that our approach can achieve fair DRAM access and outperform the traditional First-Come-First-Serve (FCFS) scheduling policy and the scheduling policies proposed by reference [7] and [24] in terms of maximum latency, Latency Standard Deviation (LSD)1 and speedup. In the experiments, the maximum improvement of the maximum latency, LSD, and speedup are 12.8\%, 6.57\%, and 8.3\% respectively. Besides, our proposal brings very small extra hardware overhead ($<$ 0.6\%) in comparison to the three counterparts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "162", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2017:MAA, author = "Jaewoo Lee and Hoon Sung Chwa and Linh T. X. Phan and Insik Shin and Insup Lee", title = "{MC-ADAPT}: Adaptive Task Dropping in Mixed-Criticality Scheduling", journal = j-TECS, volume = "16", number = "5s", pages = "163:1--163:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126498", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent embedded systems are becoming integrated systems with components of different criticality. To tackle this, mixed-criticality systems aim to provide different levels of timing assurance to components of different criticality levels while achieving efficient resource utilization. Many approaches have been proposed to execute more lower-criticality tasks without affecting the timeliness of higher-criticality tasks. Those previous approaches however have at least one of the two limitations; (i) they penalize all lower-criticality tasks at once upon a certain situation, or (ii) they make the decision how to penalize lower-criticality tasks at design time. As a consequence, they under-utilize resources by imposing an excessive penalty on low-criticality tasks. Unlike those existing studies, we present a novel framework, called MC-ADAPT, that aims to minimally penalize lower-criticality tasks by fully reflecting the dynamically changing system behavior into adaptive decision making. Towards this, we propose a new scheduling algorithm and develop its runtime schedulability analysis capable of capturing the dynamic system state. Our proposed algorithm adaptively determines which task to drop based on the runtime analysis. To determine the quality of task dropping solution, we propose the speedup factor for task dropping while the conventional use of the speedup factor only evaluates MC scheduling algorithms in terms of the worst-case schedulability. We apply the speedup factor for a newly-defined task dropping problem that evaluates task dropping solution under different runtime scheduling scenarios. We derive that MC-ADAPT has a speedup factor of 1.619 for task drop. This implies that MC-ADAPT can behave the same as the optimal scheduling algorithm with optimal task dropping strategy does under any runtime scenario if the system is sped up by a factor of 1.619.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "163", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rouxel:2017:TCD, author = "Benjamin Rouxel and Steven Derrien and Isabelle Puaut", title = "Tightening Contention Delays While Scheduling Parallel Applications on Multi-core Architectures", journal = j-TECS, volume = "16", number = "5s", pages = "164:1--164:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126496", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multi-core systems are increasingly interesting candidates for executing parallel real-time applications, in avionic, space or automotive industries, as they provide both computing capabilities and power efficiency. However, ensuring that timing constraints are met on such platforms is challenging, because some hardware resources are shared between cores. Assuming worst-case contentions when analyzing the schedulability of applications may result in systems mistakenly declared unschedulable, although the worst-case level of contentions can never occur in practice. In this paper, we present two contention-aware scheduling strategies that produce a time-triggered schedule of the application's tasks. Based on knowledge of the application's structure, our scheduling strategies precisely estimate the effective contentions, in order to minimize the overall makespan of the schedule. An Integer Linear Programming (ILP) solution of the scheduling problem is presented, as well as a heuristic solution that generates schedules very close to ones of the ILP (5\% longer on average), with a much lower time complexity. Our heuristic improves by 19\% the overall makespan of the resulting schedules compared to a worst-case contention baseline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "164", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2017:DAT, author = "Rehan Ahmed and Pengcheng Huang and Max Millen and Lothar Thiele", title = "On The Design and Application of Thermal Isolation Servers", journal = j-TECS, volume = "16", number = "5s", pages = "165:1--165:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126512", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, there has been an increasing trend towards executing real-time applications on multi-core platforms. However, this complicates the design problem, as applications running on different cores can interfere due to shared resources and mediums. In this paper, we focus on thermal interference, where a given task ($ \tau_1 $) heats the processor, resulting in reduced service (due to Dynamic Thermal Management (DTM)) to another task ($ \tau_2 $). In real-time domain, where tasks have deadline constraints, thermal interference is a substantial problem as it directly impacts the Worst Case Execution Time (WCET) of the effected application ($ \tau_2 $). The problem exacerbates as we move to mixed-criticality systems, where the criticality of $ \tau_2$ may be greater than the criticality of $ \tau_1$, complicating the certification process. In this paper, we propose a server based strategy (Thermal Isolation Server (TI Server)) which can be used to avoid thermal interference of applications. We also present a heuristic to design TI Servers to meet the timing constraints of all tasks and the thermal constraints of the system. TI Servers are time/space composable, and can be applied to a variety of task models. We also evaluate TI Servers on a hardware test-bed for validation purposes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "165", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Allamigeon:2017:FMC, author = "Xavier Allamigeon and St{\'e}phane Gaubert and Eric Goubault and Sylvie Putot and Nikolas Stott", title = "A Fast Method to Compute Disjunctive Quadratic Invariants of Numerical Programs", journal = j-TECS, volume = "16", number = "5s", pages = "166:1--166:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126502", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We introduce a new method to compute non-convex invariants of numerical programs, which includes the class of switched affine systems with affine guards. We obtain disjunctive and non-convex invariants by associating different partial execution traces with different ellipsoids. A key ingredient is the solution of non-monotone fixed points problems over the space of ellipsoids with a reduction to small size linear matrix inequalities. This allows us to analyze instances that are inaccessible in terms of expressivity or scale by earlier methods based on semi-definite programming.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "166", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schulze:2017:IIM, author = "Christoph Schulze and Rance Cleaveland", title = "Improving Invariant Mining via Static Analysis", journal = j-TECS, volume = "16", number = "5s", pages = "167:1--167:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126504", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper proposes the use of static analysis to improve the generation of invariants from test data extracted from Simulink models. Previous work has shown the utility of such automatically generated invariants as a means for updating and completing system specifications; they also are useful as a means of understanding model behavior. This work shows how the scalability and accuracy of the data mining process can be dramatically improved by using information from data/control flow analysis to reduce the search space of the invariant mining and to eliminate false positives. Comparative evaluations of the process show that the improvements significantly reduce execution time and memory consumption, thereby supporting the analysis of more complex models, while also improving the accuracy of the generated invariants.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "167", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chaki:2017:FVT, author = "Sagar Chaki and Dionisio {De Niz}", title = "Formal Verification of a Timing Enforcer Implementation", journal = j-TECS, volume = "16", number = "5s", pages = "168:1--168:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126517", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A timing enforcer is a scheduler that not only allocates CPU cycles to threads, but also uses timers to enforce time budgets. An approach for verifying safety properties of timing enforcers at the source code level is presented. We assume that the enforcer is implemented as a set of ``enforcer'' functions that are executed atomically on critical system-level events, such as the arrival and departure of jobs, and triggering of timers. The key idea is to express the safety property as an invariant, and prove that it is inductive across all the enforcer functions. A formal semantics of timing enforcers is presented, including the semantics of functions used to read the system clock and set timers. Using this semantics, the verification approach is presented, and its soundness proved. Further, the approach also takes into consideration the periodicity of tasks. It is validated by proving the correctness of the enforcement of CPU cycle budgets for tasks by the Zero-Slack Rate Monotonic (zsrm) scheduler, which is implemented in C as a Linux kernel module. The inductiveness of the necessary zsrm invariants is proved by expressing them as function contracts using the acsl specification language, and verifying the contracts using the frama-c tool.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "168", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mehrabian:2017:TTL, author = "Mohammadreza Mehrabian and Mohammad Khayatian and Aviral Shrivastava and John C. Eidson and Patricia Derler and Hugo A. Andrade and Ya-Shian Li-Baboud and Edward Griffor and Marc Weiss and Kevin Stanton", title = "Timestamp Temporal Logic {(TTL)} for Testing the Timing of Cyber-Physical Systems", journal = j-TECS, volume = "16", number = "5s", pages = "169:1--169:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126510", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In order to test the performance and verify the correctness of Cyber-Physical Systems (CPS), the timing constraints on the system behavior must be met. Signal Temporal Logic (STL) can efficiently and succinctly capture the timing constraints of a given system model. However, many timing constraints on CPS are more naturally expressed in terms of events on signals. While it is possible to specify event-based timing constraints in STL, such statements can quickly become long and arcane in even simple systems. Timing constraints for CPS, which can be large and complex systems, are often associated with tolerances, the expression of which can make the timing constraints even more cumbersome using STL. This paper proposes a new logic, Timestamp Temporal Logic (TTL), to provide a definitional extension of STL that more intuitively expresses the timing constraints of distributed CPS. TTL also allows for a more natural expression of timing tolerances. Additionally, this paper outlines a methodology to automatically generate logic code and programs to monitor the expressed timing constraints. Since our TTL monitoring logic evaluates the timing constraints using only the timestamps of the required events on the signal, the TTL monitoring logic has significantly less memory footprint when compared to traditional STL monitoring logic, which stores the signal value at the required sampling frequency. The key contribution of this paper is a scalable approach for online monitoring of the timing constraints. We demonstrate the capabilities of TTL and our methodology for online monitoring of TTL constraints on two case studies: (1) Synchronization and phase control of two generators and, (2) Simultaneous image capture using distributed cameras for 3D image reconstruction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "169", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deshmukh:2017:TCP, author = "Jyotirmoy Deshmukh and Marko Horvat and Xiaoqing Jin and Rupak Majumdar and Vinayak S. Prabhu", title = "Testing Cyber-Physical Systems through {Bayesian} Optimization", journal = j-TECS, volume = "16", number = "5s", pages = "170:1--170:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126521", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many problems in the design and analysis of cyber-physical systems (CPS) reduce to the following optimization problem: given a CPS which transforms continuous-time input traces in R$_m$ to continuous-time output traces in R$_n$ and a cost function over output traces, find an input trace which minimizes the cost. Cyber-physical systems are typically so complex that solving the optimization problem analytically by examining the system dynamics is not feasible. We consider a black-box approach, where the optimization is performed by testing the input-output behaviour of the CPS. We provide a unified, tool-supported methodology for CPS testing and optimization. Our tool is the first CPS testing tool that supports Bayesian optimization. It is also the first to employ fully automated dimensionality reduction techniques. We demonstrate the potential of our tool by running experiments on multiple industrial case studies. We compare the effectiveness of Bayesian optimization to state-of-the-art testing techniques based on CMA-ES and Simulated Annealing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "170", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2017:WHS, author = "Youcheng Sun and Marco {Di Natale}", title = "Weakly Hard Schedulability Analysis for Fixed Priority Scheduling of Periodic Real-Time Tasks", journal = j-TECS, volume = "16", number = "5s", pages = "171:1--171:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126497", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The hard deadline model is very popular in real-time research, but is representative or applicable to a small number of systems. Many applications, including control systems, are capable of tolerating occasional deadline misses, but are seriously compromised by a repeating pattern of late terminations. The weakly hard real-time model tries to capture these requirements by analyzing the conditions that guarantee that a maximum number of deadlines can be possibly missed in any set of consecutive activations. We provide a new weakly hard schedulability analysis method that applies to constrained-deadline periodic real-time systems scheduled with fixed priority and without knowledge of the task activation offsets. The analysis is based on a Mixed Integer Linear Programming (MILP) problem formulation; it is very general and can be adapted to include the consideration of resource sharing and activation jitter. A set of experiments conducted on an automotive engine control application and randomly generated tasksets show the applicability and accuracy of the proposed technique.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "171", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schlatow:2017:RTA, author = "Johannes Schlatow and Rolf Ernst", title = "Response-Time Analysis for Task Chains with Complex Precedence and Blocking Relations", journal = j-TECS, volume = "16", number = "5s", pages = "172:1--172:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126505", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For the development of complex software systems, we often resort to component-based approaches that separate the different concerns, enhance verifiability and reusability, and for which microkernel-based implementations are a good fit to enforce these concepts. Composing such a system of several interacting software components will, however, lead to complex precedence and blocking relations, which must be taken into account when performing latency analysis. When modelling these systems by classical task graphs, some of these effects are obfuscated and tend to render such an analysis either overly pessimistic or even optimistic. We therefore firstly present a novel task (meta-)model that is more expressive and accurate w.r.t. these (functional) precedence and mutual blocking relations. Secondly, we apply the busy-window approach and formulate a modular response-time analysis on task-chain level suitable but not restricted to static-priority scheduled systems. We show that the conjunction of both concepts allows the calculation of reasonably tight latency bounds for scenarios not adequately covered by related work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "172", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kurtin:2017:ART, author = "Philip S. Kurtin and Marco J. G. Bekooij", title = "An Abstraction-Refinement Theory for the Analysis and Design of Real-Time Systems", journal = j-TECS, volume = "16", number = "5s", pages = "173:1--173:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126507", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Component-based and model-based reasonings are key concepts to address the increasing complexity of real-time systems. Bounding abstraction theories allow to create efficiently analyzable models that can be used to give temporal or functional guarantees on non-deterministic and non-monotone implementations. Likewise, bounding refinement theories allow to create implementations that adhere to temporal or functional properties of specification models. For systems in which jitter plays a major role, both best-case and worst-case bounding models are needed. In this paper we present a bounding abstraction-refinement theory for real-time systems. Compared to the state-of-the-art TETB refinement theory, our theory is less restrictive with respect to the automatic lifting of properties from component to graph level and does not only support temporal worst-case refinement, but evenhandedly temporal and functional, best-case and worst-case abstraction and refinement.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "173", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Azimi:2017:HHF, author = "Iman Azimi and Arman Anzanpour and Amir M. Rahmani and Tapio Pahikkala and Marco Levorato and Pasi Liljeberg and Nikil Dutt", title = "{HiCH}: Hierarchical Fog-Assisted Computing Architecture for Healthcare {IoT}", journal = j-TECS, volume = "16", number = "5s", pages = "174:1--174:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126501", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Internet of Things (IoT) paradigm holds significant promises for remote health monitoring systems. Due to their life- or mission-critical nature, these systems need to provide a high level of availability and accuracy. On the one hand, centralized cloud-based IoT systems lack reliability, punctuality and availability (e.g., in case of slow or unreliable Internet connection), and on the other hand, fully outsourcing data analytics to the edge of the network can result in diminished level of accuracy and adaptability due to the limited computational capacity in edge nodes. In this paper, we tackle these issues by proposing a hierarchical computing architecture, HiCH, for IoT-based health monitoring systems. The core components of the proposed system are (1) a novel computing architecture suitable for hierarchical partitioning and execution of machine learning based data analytics, (2) a closed-loop management technique capable of autonomous system adjustments with respect to patient's condition. HiCH benefits from the features offered by both fog and cloud computing and introduces a tailored management methodology for healthcare IoT systems. We demonstrate the efficacy of HiCH via a comprehensive performance assessment and evaluation on a continuous remote health monitoring case study focusing on arrhythmia detection for patients suffering from CardioVascular Diseases (CVDs).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "174", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2017:ORT, author = "Yecheng Zhao and Chao Peng and Haibo Zeng and Zonghua Gu", title = "Optimization of Real-Time Software Implementing Multi-Rate Synchronous Finite State Machines", journal = j-TECS, volume = "16", number = "5s", pages = "175:1--175:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126515", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Model-based design using Synchronous Reactive (SR) models is becoming widespread for control software development in industry. However, software synthesis is challenging for multi-rate SR models consisting of blocks modeled with finite state machines, due to the complexity of validating the system's real-time schedulability. The existing approach uses the simplified periodic task model to allow an efficient schedulability analysis, which leads to pessimistic and suboptimal solutions. Instead, in this paper, we adopt a more accurate but more complex schedulability analysis. We develop several optimization techniques to improve the algorithm's efficiency. Experimental results on synthetic systems and an industrial case study show that the proposed optimization framework preserves the solution optimality but is much faster (e.g., $ 1000 \times $ for systems with 15 blocks) than the branch-and-bound algorithm, and it generates better control software than the existing approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "175", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bourke:2017:SLS, author = "Timothy Bourke and Francois Carcenac and Jean-Louis Cola{\c{c}}o and Bruno Pagano and C{\'e}dric Pasteur and Marc Pouzet", title = "A Synchronous Look at the {Simulink} Standard Library", journal = j-TECS, volume = "16", number = "5s", pages = "176:1--176:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126516", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hybrid systems modelers like Simulink come with a rich collection of discrete-time and continuous-time blocks. Most blocks are not defined in terms of more elementary ones-and some cannot be-but are instead written in imperative code and explained informally in a reference manual. This raises the question of defining a minimal set of orthogonal programming constructs such that most blocks can be programmed directly and thereby given a specification that is mathematically precise, and whose compiled version performs comparably to handwritten code. In this paper, we show that a fairly large set of blocks of a standard library like the one provided by Simulink can be programmed in a precise, purely functional language using stream equations, hierarchical automata, Ordinary Differential Equations (ODEs), and deterministic synchronous parallel composition. Some blocks cannot be expressed in our setting as they mix discrete-time and continuous-time signals in unprincipled ways that are statically forbidden by the type checker. The experiment is conducted in Z{\'e}lus, a synchronous language that conservatively extends L ustre with ODEs to program systems that mix discrete-time and continuous-time signals.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "176", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2017:TAS, author = "Jiajie Wang and Michael Mendler and Partha Roop and Bruno Bodin", title = "Timing Analysis of Synchronous Programs using {WCRT} Algebra: Scalability through Abstraction", journal = j-TECS, volume = "16", number = "5s", pages = "177:1--177:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126520", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Synchronous languages are ideal for designing safety-critical systems. Static Worst-Case Reaction Time (WCRT) analysis is an essential component in the design flow that ensures the real-time requirements are met. There are a few approaches for WCRT analysis, and the most versatile of all is explicit path enumeration. However, as synchronous programs are highly concurrent, techniques based on this approach, such as model checking, suffer from state explosion as the number of threads increases. One observation on this problem is that these existing techniques analyse the program by enumerating a functionally equivalent automaton while WCRT is a non-functional property. This mismatch potentially causes algorithm-induced state explosion. In this paper, we propose a WCRT analysis technique based on the notion of timing equivalence, expressed using WCRT algebra. WCRT algebra can effectively capture the timing behaviour of a synchronous program by converting its intermediate representation Timed Concurrent Control Flow Graph (TCCFG) into a Tick Cost Automaton (TCA), a minimal automaton that is timing equivalent to the original program. Then the WCRT is computed over the TCA. We have implemented our approach and benchmarked it against state-of-the-art WCRT analysis techniques. The results show that the WCRT algebra is 3.5 times faster on average than the fastest published technique.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "177", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pinisetty:2017:REC, author = "Srinivas Pinisetty and Partha S. Roop and Steven Smyth and Nathan Allen and Stavros Tripakis and Reinhard {Von Hanxleden}", title = "Runtime Enforcement of Cyber-Physical Systems", journal = j-TECS, volume = "16", number = "5s", pages = "178:1--178:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126500", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many implantable medical devices, such as pacemakers, have been recalled due to failure of their embedded software. This motivates rethinking their design and certification processes. We propose, for the first time, an additional layer of safety by formalising the problem of run-time enforcement of implantable pacemakers. While recent work has formalised run-time enforcement of reactive systems, the proposed framework generalises existing work along the following directions: (1) we develop bi-directional enforcement, where the enforced policies depend not only on the status of the pacemaker (the controller) but also of the heart (the plant), thus formalising the run-time enforcement problem for cyber-physical systems (2) we express policies using a variant of discrete timed automata (DTA), which can cover all regular properties unlike earlier frameworks limited to safety properties, (3) we are able to ensure the timing safety of implantable devices through the proposed enforcement, and (4) we show that the DTA-based approach is efficient relative to its dense time variant while ensuring that the discretisation error is relatively small and bounded. The developed approach is validated through a prototype system implemented using the open source KIELER framework. The experiments show that the framework incurs minimal runtime overhead.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "178", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2017:BEB, author = "Qingrui Liu and Xiaolong Wu and Larry Kittinger and Markus Levy and Changhee Jung", title = "{BenchPrime}: Effective Building of a Hybrid Benchmark Suite", journal = j-TECS, volume = "16", number = "5s", pages = "179:1--179:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126499", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This paper presents BenchPrime, an automated benchmark analysis toolset that is systematic and extensible to analyze the similarity and diversity of benchmark suites. BenchPrime takes multiple benchmark suites and their evaluation metrics as inputs and generates a hybrid benchmark suite comprising only essential applications. Unlike prior work, BenchPrime uses linear discriminant analysis rather than principal component analysis, as well as selects the best clustering algorithm and the optimized number of clusters in an automated and metric-tailored way, thereby achieving high accuracy. In addition, BenchPrime ranks the benchmark suites in terms of their application set diversity and estimates how unique each benchmark suite is compared to other suites. As a case study, this work for the first time compares the DenBench with the MediaBench and MiBench using four different metrics to provide a multi-dimensional understanding of the benchmark suites. For each metric, BenchPrime measures to what degree DenBench applications are irreplaceable with those in MediaBench and MiBench. This provides means for identifying an essential subset from the three benchmark suites without compromising the application balance of the full set. The experimental results show that the necessity of including DenBench applications varies across the target metrics and that significant redundancy exists among the three benchmark suites.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "179", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schuster:2017:DSE, author = "Simon Schuster and Peter Ulbrich and Isabella Stilkerich and Christian Dietrich and Wolfgang Schr{\"o}Der-Preikschat", title = "Demystifying Soft-Error Mitigation by Control-Flow Checking --- A New Perspective on its Effectiveness", journal = j-TECS, volume = "16", number = "5s", pages = "180:1--180:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126503", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Soft errors are a challenging and urging problem in the domain of safety-critical embedded systems. For decades, checking schemes have been investigated and improved to mitigate soft-error effects for the class of control-flow faults, with current industrial standards strongly recommending their use. However, reality looks different: Taking a systems perspective, we implemented four representative Control-Flow Checking (CFC) schemes and put them through their paces in 396 fault-injection campaigns. In contrast to previous work, which typically relied on probability-based vulnerability metrics, we accounted for the influence of memory and time overheads on the fault-space dimensions and applied those in full-scan fault injections. This change in procedure alone severely degraded the perceived effectiveness of CFC. In addition, we expanded the perspective to data-flow faults and their influence on the overall susceptibility, an aspect that so far has been largely ignored. Our results suggest that, without accompanying measures, any improvement regarding control-flow faults is dominated by the increase in data faults caused by the increased attack surface in terms of memory and runtime overhead. Moreover, CFC performance less depended on the detection capabilities than on general aspects of the concrete binary compilation and execution. In conclusion, incorporating CFC is not as straightforward as often assumed and the vulnerability of systems with hardened control-flow may in many cases even be increased by the schemes themselves.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "180", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shresthamali:2017:APM, author = "Shaswot Shresthamali and Masaaki Kondo and Hiroshi Nakamura", title = "Adaptive Power Management in Solar Energy Harvesting Sensor Node Using Reinforcement Learning", journal = j-TECS, volume = "16", number = "5s", pages = "181:1--181:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126495", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this paper, we present an adaptive power manager for solar energy harvesting sensor nodes. We use a simplified model consisting of a solar panel, an ideal battery and a general sensor node with variable duty cycle. Our power manager uses Reinforcement Learning (RL), specifically SARSA($ \lambda $) learning, to train itself from historical data. Once trained, we show that our power manager is capable of adapting to changes in weather, climate, device parameters and battery degradation while ensuring near-optimal performance without depleting or overcharging its battery. Our approach uses a simple but novel general reward function and leverages the use of weather forecast data to enhance performance. We show that our method achieves near perfect energy neutral operation (ENO) with less than 6\% root mean square deviation from ENO as compared to more than 23\% deviation that occur when using other approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "181", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2017:AAS, author = "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim", title = "Application-Aware Swapping for Mobile Systems", journal = j-TECS, volume = "16", number = "5s", pages = "182:1--182:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126509", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "There has been a constant demand for memory in modern mobile systems to provide users with better experience. Swapping is one of the cost-effective software solutions to provide extra usable memory by reclaiming inactive pages and improving memory utilization. However, swapping has not been actively adopted to mobile systems since it incurs a significant amount of I/O, which in fact impairs system performance as well as user experience. In this paper, we propose a novel scheme to properly harness the swapping to mobile systems. We identify that a vast amount of I/O for swapping comes from the conflict of the traditional page-level approach of the swapping and the process-level memory management scheme tailored to mobile systems. Moreover, we find out that the current victim page selection policy is not effective due to the process-level policy. To address these problems, we revise the victim selection policy to resolve the conflict and to selectively perform swapping according to the efficacy of swapping. Evaluation using a running prototype with realistic workloads indicates that the propose scheme effectively reduces the paging traffic, thereby improving user experience as well as energy consumption.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "182", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ji:2017:LDC, author = "Cheng Ji and Li-Pin Chang and Liang Shi and Congming Gao and Chao Wu and Yuangang Wang and Chun Jason Xue", title = "Lightweight Data Compression for Mobile Flash Storage", journal = j-TECS, volume = "16", number = "5s", pages = "183:1--183:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126511", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Data compression is beneficial to flash storage lifespan. However, because the design of mobile flash storage is highly cost-sensitive, hardware compression becomes a less attractive option. This study investigates the feasibility of data compression on mobile flash storage. It first characterizes data compressibility based on mobile apps, and the analysis shows that write traffic bound for mobile storage volumes is highly compressible. Based on this finding, a lightweight approach is introduced for firmware-based data compression in mobile flash storage. The controller and flash module work in a pipelined fashion to hide the data compression overhead. Together with this pipelined design, the proposed approach selectively compresses incoming data of high compressibility, while leaving data of low compressibility to a compression-aware garbage collector. Experimental results show that our approach greatly reduced the frequency of block erase by 50.5\% compared to uncompressed flash storage. Compared to unconditional data compression, our approach improved the write latency by 10.4\% at a marginal cost of 4\% more block erase operations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "183", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Medhat:2017:MPE, author = "Ramy Medhat and Michael O. Lam and Barry L. Rountree and Borzoo Bonakdarpour and Sebastian Fischmeister", title = "Managing the Performance\slash Error Tradeoff of Floating-point Intensive Applications", journal = j-TECS, volume = "16", number = "5s", pages = "184:1--184:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126519", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern embedded systems are becoming more reliant on real-valued arithmetic as they employ mathematically complex vision algorithms and sensor signal processing. Double-precision floating point is the most commonly used precision in computer vision algorithm implementations. A single-precision floating point can provide a performance boost due to less memory transfers, less cache occupancy, and relatively faster mathematical operations on some architectures. However, adopting it can result in loss of accuracy. Identifying which parts of the program can run in single-precision floating point with low impact on error is a manual and tedious process. In this paper, we propose an automatic approach to identify parts of the program that have a low impact on error using shadow-value analysis. Our approach provides the user with a performance/error tradeoff, using which the user can decide how much accuracy can be sacrificed in return for performance improvement. We illustrate the impact of the approach using a well known implementation of Apriltag detection used in robotics vision. We demonstrate that an average 1.3x speedup can be achieved with no impact on tag detection, and a 1.7x speedup with only 4\% false negatives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "184", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sogokon:2017:OMP, author = "Andrew Sogokon and Khalil Ghorbal and Taylor T. Johnson", title = "Operational Models for Piecewise-Smooth Systems", journal = j-TECS, volume = "16", number = "5s", pages = "185:1--185:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126506", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article we study ways of constructing meaningful operational models of piecewise-smooth systems (PWS). The systems we consider are described by polynomial vector fields defined on non-overlapping semi-algebraic sets, which form a partition of the state space. Our approach is to give meaning to motion in systems of this type by automatically synthesizing operational models in the form of hybrid automata (HA). Despite appearances, it is in practice often difficult to arrive at satisfactory HA models of PWS. The different ways of building operational models that we explore in our approach can be thought of as defining different semantics for the underlying PWS. These differences have a number of interesting nuances related to phenomena such as chattering, non-determinism, so-called mythical modes and sliding behaviour.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "185", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2017:PSV, author = "Chao Huang and Xin Chen and Wang Lin and Zhengfeng Yang and Xuandong Li", title = "Probabilistic Safety Verification of Stochastic Hybrid Systems Using Barrier Certificates", journal = j-TECS, volume = "16", number = "5s", pages = "186:1--186:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126508", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The problem of probabilistic safety verification of stochastic hybrid systems is to check whether the probability that a given system will reach an unsafe region from certain initial states can be bounded by some given probability threshold. The paper considers stochastic hybrid systems where the behavior is governed by polynomial equalities and inequalities, as for usual hybrid systems, but the initial states follow some stochastic distributions. It proposes a new barrier certificate based method for probabilistic safety verification which guarantees the absolute safety in a infinite time horizon that is beyond the reach of existing techniques using either statistical model checking or probabilistic reachable set computation. It also gives a novel computational approach, by building and solving a constrained optimization problem coming from verification conditions of barrier certificates, to compute the lower bound on safety probabilities which can be compared with the given threshold. Experimental evidence is provided demonstrating the applicability of our approach on several benchmarks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "186", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2017:CRA, author = "Xin Chen and Sergio Mover and Sriram Sankaranarayanan", title = "Compositional Relational Abstraction for Nonlinear Hybrid Systems", journal = j-TECS, volume = "16", number = "5s", pages = "187:1--187:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126522", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We propose techniques to construct abstractions for nonlinear dynamics in terms of relations expressed in linear arithmetic. Such relations are useful for translating the closed loop verification problem of control software with continuous-time, nonlinear plant models into discrete and linear models that can be handled by efficient software verification approaches for discrete-time systems. We construct relations using Taylor model based flowpipe construction and the systematic composition of relational abstractions for smaller components. We focus on developing efficient schemes for the special case of composing abstractions for linear and nonlinear components. We implement our ideas using a relational abstraction system, using the resulting abstraction inside the verification tool NuXMV, which implements numerous SAT/SMT solver-based verification techniques for discrete systems. Finally, we evaluate the application of relational abstractions for verifying properties of time triggered controllers, comparing with the Flow* tool. We conclude that relational abstractions are a promising approach towards nonlinear hybrid system verification, capable of proving properties that are beyond the reach of tools such as Flow*. At the same time, we highlight the need for improvements to existing linear arithmetic SAT/SMT solvers to better support reasoning with large relational abstractions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "187", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lesi:2017:SAS, author = "Vuk Lesi and Ilija Jovanov and Miroslav Pajic", title = "Security-Aware Scheduling of Embedded Control Tasks", journal = j-TECS, volume = "16", number = "5s", pages = "188:1--188:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126518", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, we focus on securing cyber-physical systems (CPS) in the presence of network-based attacks, such as Man-in-the-Middle (MitM) attacks, where a stealthy attacker is able to compromise communication between system sensors and controllers. Standard methods for this type of attacks rely on the use of cryptographic mechanisms, such as Message Authentication Codes (MACs) to ensure data integrity. However, this approach incurs significant computation overhead, limiting its use in resource constrained systems. Consequently, we consider the problem of scheduling multiple control tasks on a shared processor while providing a suitable level of security guarantees. Specifically, by security guarantees we refer to control performance, i.e., Quality-of-Control (QoC), in the presence of attacks. We start by mapping requirements for QoC under attack into constraints for security-aware control tasks that, besides standard control operations, intermittently perform data authentication. This allows for the analysis of the impact that security-related computation overhead has on both schedulability of control tasks and QoC. Building on this analysis, we introduce a mixed-integer linear programming-based technique to obtain a schedulable task set with predefined QoC requirements. Also, to facilitate optimal resource allocation, we provide a method to analyze interplay between available computational resources and the overall QoC under attack, and show how to obtain a schedulable task set that maximizes the overall QoC guarantees. Finally, we prove usability of our approach on a case study with multiple automotive control components.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "188", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2017:SMP, author = "Sumana Ghosh and Souradeep Dutta and Soumyajit Dey and Pallab Dasgupta", title = "A Structured Methodology for Pattern based Adaptive Scheduling in Embedded Control", journal = j-TECS, volume = "16", number = "5s", pages = "189:1--189:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126514", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Software implementation of multiple embedded control loops often share compute resources. The control performance of such implementations have been shown to improve if the sharing of bandwidth between control loops can be dynamically regulated in response to input disturbances. In the absence of a structured methodology for planning such measures, the scheduler may spend too much time in deciding the optimal scheduling pattern. Our work leverages well known results in the domain of network control systems and applies them in the context of bandwidth sharing among controllers. We provide techniques that may be used a priori for computing co-schedulable execution patterns for a given set of control loops such that stability is guaranteed under all possible disturbance scenarios. Additionally, the design of the control loops optimize the average case control performance by adaptive sharing of bandwidth under time varying input disturbances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "189", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gavran:2017:AMR, author = "Ivan Gavran and Rupak Majumdar and Indranil Saha", title = "{Antlab}: a Multi-Robot Task Server", journal = j-TECS, volume = "16", number = "5s", pages = "190:1--190:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126513", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We present Antlab, an end-to-end system that takes streams of user task requests and executes them using collections of robots. In Antlab, each request is specified declaratively in linear temporal logic extended with quantifiers over robots. The user does not program robots individually, nor know how many robots are available at any time or the precise state of the robots. The Antlab runtime system manages the set of robots, schedules robots to perform tasks, automatically synthesizes robot motion plans from the task specification, and manages the co-ordinated execution of the plan. We provide a constraint-based formulation for simultaneous task assignment and plan generation for multiple robots working together to satisfy a task specification. In order to scalably handle multiple concurrent tasks, we take a separation of concerns view to plan generation. First, we solve each planning problem in isolation, with an ``ideal world'' hypothesis that says there are no unspecified dynamic obstacles or adversarial environment actions. Second, to deal with imprecisions of the real world, we implement the plans in receding horizon fashion on top of a standard robot navigation stack. The motion planner dynamically detects environment actions or dynamic obstacles from the environment or from other robots and locally corrects the ideal planned path. It triggers a re-planning step dynamically if the current path deviates from the planned path or if planner assumptions are violated. We have implemented Antlab as a C++ and Python library on top of robots running on ROS, using SMT-based and AI planning-based implementations for task and path planning. We evaluated Antlab both in simulation as well as on a set of TurtleBot robots. We demonstrate that it can provide a scalable and robust infrastructure for declarative multi-robot programming.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "190", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2018:ETS, author = "Sandeep K. Shukla", title = "Editorial: Trust and Security Must Become a Primary Design Concern in Embedded Computing", journal = j-TECS, volume = "17", number = "1", pages = "1:1--1:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2018:GEA, author = "Jiming Chen and Yu (Jason) Gu and Gil Zussman", title = "Guest Editorial for {ACM TECS}: Special Issue on Autonomous Battery-Free Sensing and Communication", journal = j-TECS, volume = "17", number = "1", pages = "2:1--2:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3127494", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2018:HEW, author = "Qi Chen and Ye Liu and Guangchi Liu and Qing Yang and Xianming Shi and Hongwei Gao and Lu Su and Quanlong Li", title = "Harvest Energy from the Water: a Self-Sustained Wireless Water Quality Sensing System", journal = j-TECS, volume = "17", number = "1", pages = "3:1--3:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047646", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Water quality data is incredibly important and valuable, but its acquisition is not always trivial. A promising solution is to distribute a wireless sensor network in water to measure and collect the data; however, a drawback exists in that the batteries of the system must be replaced or recharged after being exhausted. To mitigate this issue, we designed a self-sustained water quality sensing system that is powered by renewable bioenergy generated from microbial fuel cells (MFCs). MFCs collect the energy released from native magnesium oxidizing microorganisms (MOMs) that are abundant in natural waters. The proposed energy-harvesting technology is environmentally friendly and can provide maintenance-free power to sensors for several years. Despite these benefits, an MFC can only provide microwatt-level power that is not sufficient to continuously power a sensor. To address this issue, we designed a power management module to accumulate energy when the input voltage is as low as 0.33V. We also proposed a radio-frequency (RF) activation technique to remotely activate sensors that otherwise are switched off in default. With this innovative technique, a sensor's energy consumption in sleep mode can be completely avoided. Additionally, this design can enable on-demand data acquisitions from sensors. We implement the proposed system and evaluate its performance in a stream. In 3-month field experiments, we find the system is able to reliably collect water quality data and is robust to environment changes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gomez:2018:ELT, author = "Andres Gomez and Lukas Sigrist and Thomas Schalch and Luca Benini and Lothar Thiele", title = "Efficient, Long-Term Logging of Rich Data Sensors Using Transient Sensor Nodes", journal = j-TECS, volume = "17", number = "1", pages = "4:1--4:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3047499", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "While energy harvesting is generally seen to be the key to power cyber-physical systems in a low-cost, long-term, efficient manner, it has generally required large energy storage devices to mitigate the effects of the source's variability. The emerging class of transiently powered systems embrace this variability by performing computation in proportion to the energy harvested, thereby minimizing the obtrusive and expensive storage element. By using an efficient Energy Management Unit (EMU), small bursts of energy can be buffered in an optimally sized capacitor and used to supply generic loads, even when the average harvested power is only a fraction of that required for sustained system operation. Dynamic Energy Burst Scaling (DEBS) can be used by the load to dynamically configure the EMU to supply small bursts of energy at its optimal power point, independent from the harvester's operating point. Parameters like the maximum burst size, the solar panel's area, as well as the use of energy-efficient Non-Volatile Memory Hierarchy (NVMH) can have a significant impact on the transient system's characteristics such as the wake-up time and the amount of work that can be done per unit of energy. Experimental data from a solar-powered, long-term autonomous image acquisition application show that, regardless of its configuration, the EMU can supply energy bursts to a 43.4mW load with efficiencies of up to 79.7\% and can work with input power levels as low as 140 $ \mu $W. When the EMU is configured to use DEBS and NVMH, the total energy cost of acquiring, processing and storing an image can be reduced by 77.8\%, at the price of increasing the energy buffer size by 65\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2018:TAB, author = "Zejue Wang and Hongjia Li and Dan Hu and Song Ci", title = "Transmission Adaptation for Battery-Free Relaying", journal = j-TECS, volume = "17", number = "1", pages = "5:1--5:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3055513", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy harvesting (EH)-enabled relaying has attracted considerable attention as an effective way to prolong the operation time of energy-constrained networks and extend coverage beside desired survivability and rate of transmission. In related literature, the Harvest-Store-Use (HSU) model is usually utilized to describe the energy flow behavior of the EH system. However, the half-duplex (HD) constraint of HSU that harvested energy can only be used after being temporally stored in energy buffer may reduce effective transmission time. Thus, we first construct the full-duplex (FD) energy flow behavior model of the EH system where the harvested energy can be tuned to power load and being stored simultaneously. The FD model is then proved to be equivalent with the HSU model when time interval is small enough. Considering some key physical variabilities, for example, the wireless channel and the amount of harvested energy, the transmission adaptation problem for multiple relays embedded with FD EH systems is formulated with the objective to improve the utilization of the harvested energy. We tackle the problem by using a centralized optimization algorithm by jointly tuning the factors, including power control for source and relay nodes, relay selection and dynamic switching among four relay transmission mode, namely HD amplify-and-forward (AF), HD decode-and-forward (DF), FD AF, and FD DF. The centralized optimization algorithm is proposed on the basis of dual decomposition and serves as a benchmark. To enable relays to individually make their own decisions, a distributed algorithm with relatively higher complexity is given by using consensus optimization in conjunction with the alternating direction method of multipliers, and a sub-optimal algorithm with low complexity is provided. The proposed algorithms are shown to have good performance via simulations for a range of different EH rates and prediction errors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2018:STW, author = "Zhongqin Wang and Fu Xiao and Ning Ye and Ruchuan Wang and Panlong Yang", title = "A See-through-Wall System for Device-Free Human Motion Sensing Based on Battery-Free {RFID}", journal = j-TECS, volume = "17", number = "1", pages = "6:1--6:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3055515", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A see-through-wall system can be used in life detection, military fields, elderly people surveillance. and gaming. The existing systems are mainly based on military devices, customized signals or pre-deployed sensors inside the room, which are very expensive and inaccessible for general use. Recently, a low-cost RFID technology has gained a lot of attention in this field. Since phase estimates of a battery-free RFID tag collected by a commercial off-the-shelf (COTS) RFID reader are sensitive to external interference, the RFID tag could be regarded as a battery-free sensor that detects reflections off targeted objects. The existing RFID-based system, however, needs to first learn the environment of the empty room beforehand to separate reflections off the tracked target. Besides, it can only track low-speed metal objects with high-positioning accuracy. Since the human body with its complex surface has a weaker ability to reflect radio frequency (RF) signals than metal objects, a battery-free RFID tag can capture only a subset of the reflections off the human body. To address these challenges, a RFID-based human motion sensing technology, called RF-HMS, is presented to track device-free human motion through walls. At first, we construct transfer functions of multipath channel based on phase and RSSI measurements to eliminate device noise and reflections off static objects like walls and furniture without learning the environment of the empty room before. Then a tag planar array is grouped by many battery-free RFID tags to improve the sensing performance. RF-HMS combines reflections from each RFID tag into a reinforced result. On this basis, we extract phase shifts to detect the absence or presence of any moving persons and further derive the reflections off a single moving person to identify his/her forward or backward motion direction. The results show that RF-HMS can effectively detect the absence or presence of moving persons with 100\% accuracy and keep a high accuracy of more than 90\% to track human motion directions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2018:OOP, author = "Chi Lin and Yanhong Zhou and Houbing Song and Chang Wu Yu and Guowei Wu", title = "{OPPC}: an Optimal Path Planning Charging Scheme Based on Schedulability Evaluation for {WRSNs}", journal = j-TECS, volume = "17", number = "1", pages = "7:1--7:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3126684", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The lack of schedulability evaluation of previous charging schemes in wireless rechargeable sensor networks (WRSNs) degrades the charging efficiency, leading to node exhaustion. We propose an Optimal Path Planning Charging scheme, namely OPPC, for the on-demand charging architecture. OPPC evaluates the schedulability of a charging mission, which makes charging scheduling predictable. It provides an optimal charging path which maximizes charging efficiency. When confronted with a non-schedulable charging mission, a node discarding algorithm is developed to enable the schedulability. Experimental simulations demonstrate that OPPC can achieve better performance in successful charging rate as well as charging efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2018:JOS, author = "Hang Hu and Hang Zhang and Jianxin Guo and Feng Wang", title = "Joint Optimization of Sensing and Power Allocation in Energy-Harvesting Cognitive Radio Networks", journal = j-TECS, volume = "17", number = "1", pages = "8:1--8:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070709", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The energy-harvesting cognitive radio (CR) network is proposed to improve the spectrum efficiency and energy efficiency. We focus on the optimization of sensing time and power allocation to maximize the throughput of the energy-harvesting CR network subject to the energy causality constraint and collision constraint. Based on the classification of operating regions, the optimization problem is divided into two sub-problems. Then, the efficient iterative Algorithm 1 and Algorithm 2 are proposed to solve sub-problem (A) and sub-problem (B), respectively. Numerical results show that a significant improvement in the throughput is achieved via joint optimization of sensing time and power allocation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2018:RRA, author = "Die Wu and Li Lu and Muhammad Jawad Hussain and Songfan Li and Mo Li and Fengli Zhang", title = "{$ R^3 $}: Reliable Over-the-Air Reprogramming on Computational {RFIDs}", journal = j-TECS, volume = "17", number = "1", pages = "9:1--9:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070720", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Computational Radio Frequency Identification (CRFID) tags operate solely on harvested energy and have emerged as viable platforms for a variety of ubiquitous sensing and computation applications. Due to their battery-less nature, these tags can be permanently deployed in hard-to-reach places where the possibility of tag access is eliminated. In such scenarios, maintaining and upgrading the tag's firmware becomes infeasible because programming tools, including wired interface and PC-based software, are required to erase, modify, or reprogram the microcontroller unit's memory. Such limitations necessitate the demand for an over-the-air (OTA) scheme, which can wirelessly reprogram or upgrade the firmware in CRFID tags. In this article, we present $ R^3 $ --- a reliable OTA reprogramming scheme that is compliant with EPC protocol and requires no hardware upgrade to RFID reader or CRFID tag. We demonstrate our scheme on three platforms, which include both software-defined as well as chip-based CRFID tags, that is, WISP5.1 and Optimized WISP (Opt-WISP), and Spider tag, respectively. The selection also includes both the FLASH- and FRAM-based microcontrollers. We extensively evaluate our scheme in terms of several metrics, including overall system delay, time and energy overhead, and success rate in line with interrogation range. We foresee our endeavor to offer the viability of OTA reprogramming and firmware upgrade for CRFID tokens under practical situations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2018:NOC, author = "Songyuan Li and Lingkun Fu and Shibo He and Youxian Sun", title = "Near-Optimal Co-Deployment of Chargers and Sink Stations in Rechargeable Sensor Networks", journal = j-TECS, volume = "17", number = "1", pages = "10:1--10:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3070721", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless charging technology has drawn great attention of both academia and industry in recent years, due to its potential of significantly improving the system performance of sensor networks. The emergence of an open-source experimental platform for wireless rechargeable sensor networks, Powercast, has made the theoretical research closer to reality. This pioneering platform is able to recharge sensor nodes much more efficiently and allows different communication protocols to be implemented upon users' demands. Different from the RFID-based model widely used in the existing works, Powercast designs the charger and sink station separately. This leads to a new design challenge of cooperatively deploying minimum number of chargers and sink stations in wireless rechargeable sensor networks. Such a co-deployment issue is extremely challenging, since the deployments of chargers and sink stations are coupled, and each subproblem is known to be NP-hard. The key to the design is to understand the intrinsic relationship between data flow and energy flow, which is interdependent. In this article, we tackle this challenge by dividing it into two subproblems and optimizing charger and sink station deployment iteratively. Specifically, we first transform each subproblem to a max-flow problem. With this, we are able to select chargers or sink stations according to their contributions to the total flow rate. We design greedy-based algorithms with a guaranteed worst-case bound $ \ln R / \xi $ for the subproblems of charger deployment and sink station deployment, respectively. Further, we address the original problem by designing an iterative algorithm that solves two subproblems alternatively to achieve a near optimal performance. We corroborate our analysis by extensive simulations under practical coefficient settings and demonstrate the advantage of the proposed algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wagemann:2018:OEN, author = "Peter W{\"a}gemann and Tobias Distler and Heiko Janker and Phillip Raffeck and Volkmar Sieh and Wolfgang Schr{\"o}der-Preikschat", title = "Operating Energy-Neutral Real-Time Systems", journal = j-TECS, volume = "17", number = "1", pages = "11:1--11:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3078631", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Energy-neutral real-time systems harvest the entire energy they use from their environment. In such systems, energy must be treated as an equally important resource as time, which creates the need to solve a number of problems that so far have not been addressed by traditional real-time systems. In particular, this includes the scheduling of tasks with both time and energy constraints, the monitoring of energy budgets, as well as the survival of blackout periods during which not enough energy is available to keep the system fully operational. In this article, we address these issues presenting E nOS, an operating-system kernel for energy-neutral real-time systems. EnOS considers mixed time criticality levels for different energy criticality modes, which enables a decoupling of time and energy constraints when one is considered less critical than the other. When switching the energy criticality mode, the system also changes the set of executed tasks and is therefore able to dynamically adapt its energy consumption depending on external conditions. By keeping track of the energy budget available, EnOS ensures that in case of a blackout the system state is safely stored to persistent memory, allowing operations to resume at a later point when enough energy is harvested again.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rajib:2018:PRI, author = "MD. Majharul Islam Rajib and Asis Nasipuri", title = "Predictive Retransmissions for Intermittently Connected Sensor Networks with Transmission Diversity", journal = j-TECS, volume = "17", number = "1", pages = "12:1--12:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092947", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Batteryless wireless sensor networks that rely on energy harvested from the environment often exhibit random power outages due to limitations of energy resources, which give rise to intermittent connectivity and long transmission delays. To improve the delay performance in such networks, we consider a design strategy that uses predictive retransmissions to maximize the probability of success for each transmission. This is applied to two different transmission diversity schemes: cooperative relaying over unicast routes and opportunistic routing. Performance evaluations from theoretical models and simulations are presented that show that significant gains can be achieved using the proposed approach in such networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2018:GEP, author = "Chi Xu and Wei Liang and Haibin Yu", title = "Green-Energy-Powered Cognitive Radio Networks: Joint Time and Power Allocation", journal = j-TECS, volume = "17", number = "1", pages = "13:1--13:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092949", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article studies a green-energy-powered cognitive radio network (GCRN) in an underlay paradigm, wherein multiple battery-free secondary users (SUs) capture both the spectrum and the energy of primary users (PUs) to communicate with an access point (AP). By time division multiple access, each SU transmits data to AP in the allocated time and harvests energy from the RF signals of PUs otherwise, all in the same licensed spectrum concurrently with PUs. Thus, the transmit power of each SU is jointly constrained by the peak interference power at PU and the harvested energy of SU. With the formulated green coexistence paradigm, we investigate the sum-throughput maximization problem with respect to time and power allocation, which is non-convex. To obtain the optimal resource allocation, we propose a joint optimal time and power allocation (JOTPA) algorithm that first transforms the original problem into a convex optimization problem with respect to time and energy allocation, and then solve it by iterative Lagrange dual decomposition. To comprehensively evaluate the performance of the GCRN with JOTPA, we deploy the GCRN in three typical scenarios and compare JOTPA with the equal time and optimal power allocation (ETOPA) algorithm. Extensive simulations show that the deployment of the GCRN significantly influences the throughput performance and JOTPA outperforms ETOPA under all considered scenarios.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Eles:2018:GES, author = "Petru Eles and J{\"o}rg Henkel", title = "Guest Editorial for the Special Issue of {ESWEEK 2016}", journal = j-TECS, volume = "17", number = "1", pages = "14:1--14:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3152097", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hegde:2018:CAC, author = "Gopalakrishna Hegde and Siddhartha and Nachiket Kapre", title = "{CaffePresso}: Accelerating Convolutional Networks on Embedded {SoCs}", journal = j-TECS, volume = "17", number = "1", pages = "15:1--15:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105925", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Auto-tuning and parametric implementation of deep learning kernels allow off-the-shelf accelerator-based embedded platforms to deliver high-performance and energy-efficient mappings of the inference phase of lightweight neural networks. Low-complexity classifiers are characterized by operations on small image maps with two to three deep layers and few class labels. For these use cases, we consider a range of embedded systems with 20W power budgets such as the Xilinx ZC706 (FPGA), NVIDIA Jetson TX1 (GPU), TI Keystone II (DSP), and Adapteva Parallella (RISC+NoC). In CaffePresso, we combine auto-tuning of the implementation parameters, and platform-specific constraints deliver optimized solutions for each input ConvNet specification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tan:2018:LLP, author = "Cheng Tan and Aditi Kulkarni and Vanchinathan Venkataramani and Manupa Karunaratne and Tulika Mitra and Li-Shiuan Peh", title = "{LOCUS}: Low-Power Customizable Many-Core Architecture for Wearables", journal = j-TECS, volume = "17", number = "1", pages = "16:1--16:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3122786", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Application requirements, such as real-time response, are pushing wearable devices to leverage more powerful processors inside the SoC (system on chip). However, existing wearable devices are not well suited for such challenging applications due to poor performance, and the conventional powerful many-core architectures are not appropriate either due to the stringent power budget in this domain. We propose LOCUS-a low-power, customizable, many-core processor for next-generation wearable devices. LOCUS combines customizable processor cores with a customizable network on a message-passing architecture to deliver very competitive performance/watt-an average $ 3.1 \times $ compared to quad-core ARM processors used in state-of-the-art wearable devices. A combination of full system simulation with representative applications from the wearable domain and RTL synthesis of the architecture show that 16-core LOCUS achieves an average $ 1.52 \times $ performance/watt improvement over a conventional 16-core shared memory many-core architecture. A dynamic power management mechanism is proposed to further decrease the power consumption in both computation and communication, which improves the performance/watt of LOCUS by $ 1.17 \times $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sutar:2018:DPI, author = "Soubhagya Sutar and Arnab Raha and Devadatta Kulkarni and Rajeev Shorey and Jeffrey Tew and Vijay Raghunathan", title = "{D-PUF}: an Intrinsically Reconfigurable {DRAM PUF} for Device Authentication and Random Number Generation", journal = j-TECS, volume = "17", number = "1", pages = "17:1--17:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105915", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Physically Unclonable Functions (PUFs) have proved to be an effective and low-cost measure against counterfeiting by providing device authentication and secure key storage services. Memory-based PUF implementations are an attractive option due to the ubiquitous nature of memory in electronic devices and the requirement of minimal (or no) additional circuitry. Dynamic Random Access Memory-- (DRAM) based PUFs are particularly advantageous due to their large address space and multiple controllable parameters during response generation. However, prior works on DRAM PUFs use a static response-generation mechanism making them vulnerable to security attacks. Further, they result in slow device authentication, are not applicable to commercial off-the-shelf devices, or require DRAM power cycling prior to authentication. In this article, we propose D-PUF, an intrinsically reconfigurable DRAM PUF based on the idea of DRAM refresh pausing. A key feature of the proposed DRAM PUF is reconfigurability, that is, by varying the DRAM refresh-pause interval, the challenge-response behavior of the PUF can be altered, making it robust to various attacks. The article is broadly divided into two parts. In the first part, we demonstrate the use of D-PUF in performing device authentication through a secure, low-overhead methodology. In the second part, we show the generation of true random numbers using D-PUF. The design is implemented and validated using an Altera Stratix IV GX FPGA-based Terasic TR4-230 development board and several off-the-shelf 1GB DDR3 DRAM modules. Our experimental results demonstrate a $ 4.3 \times $--$ 6.4 \times $ reduction in authentication time compared to prior work. Using controlled temperature and accelerated aging tests, we also demonstrate the robustness of our authentication mechanism to temperature variations and aging effects. Finally, the ability of the design to generate random numbers is verified using the NIST Statistical Test Suite.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guo:2018:IWP, author = "Jie Guo and Chuhan Min and Tao Cai and Yiran Chen", title = "Improving Write Performance and Extending Endurance of Object-Based {NAND} Flash Devices", journal = j-TECS, volume = "17", number = "1", pages = "18:1--18:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105924", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Write amplification is a major cause of performance and endurance degradations in NAND flash-based storage systems. In an object-based NAND flash device (ONFD), two causes of write amplification are onode partial update and cascading update. Here, onode is a type of small-sized object metadata, and multiple onodes are stored in one NAND flash page. Updating one onode invokes partial page update (i.e., onode partial update), incurring unnecessary migration of the un-updated data. Cascading update denotes updating object metadata in a cascading manner due to object data update or migration. Although there are only several bytes that need to be updated in the object metadata, one or more pages have to be re-written accordingly. In this work, we propose a system design to alleviate the write amplification issue in the object-based NAND flash device. The proposed design includes (1) a multi-level garbage collection technique to minimize unnecessary data migration incurred by onode partial update and (2) a B+ table tree, Semantics-Aware Flexible (SAF) data layout, and selective cache design to reduce the write operations associated with cascading update. To guarantee system consistency, we also propose a power failure handling technique. Experiment results show that our proposed design can achieve up to 20\% write reduction compared to the best states of the art.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Maier:2018:FIT, author = "Petra R. Maier and Veit B. Kleeberger and Daniel Mueller-Gritschneder and Ulf Schlichtmann", title = "Fault Injection for Test-Driven Development of Robust {SoC} Firmware", journal = j-TECS, volume = "17", number = "1", pages = "19:1--19:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092943", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Robustness against errors in hardware must be considered from the very beginning of safety-critical system-on-chip firmware design. Therefore, we present fault injection for test-driven development (TDD) of robust firmware. As TDD is based on instant feedback to the designer, fault injection must execute within few minutes. In contrast to state-of-the-art approaches, we avoid long simulation scenarios and runtimes by injecting faults at the unit level and utilizing host-compiled simulation. Further, three static bit-level analyses of firmware source code and hardware specification reduce the fault set significantly. This accelerates fault injection by several orders of magnitude and enables robustness-aware TDD.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Balkan:2018:UFA, author = "Ayca Balkan and Paulo Tabuada and Jyotirmoy V. Deshmukh and Xiaoqing Jin and James Kapinski", title = "{Underminer}: a Framework for Automatically Identifying Nonconverging Behaviors in Black-Box System Models", journal = j-TECS, volume = "17", number = "1", pages = "20:1--20:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3122787", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Evaluation of industrial embedded control system designs is a time-consuming and imperfect process. While an ideal process would apply a formal verification technique such as model checking or theorem proving, these techniques do not scale to industrial design problems, and it is often difficult to use these techniques to verify performance aspects of control system designs, such as stability or convergence. For industrial designs, engineers rely on testing processes to identify critical or unexpected behaviors. We propose a novel framework called Underminer to improve the testing process; this is an automated technique to identify nonconverging behaviors in embedded control system designs. Underminer treats the system as a black box and lets the designer indicate the model parameters, inputs, and outputs that are of interest. It differentiates convergent from nonconvergent behaviors using Convergence Classifier Functions (CCFs). The tool can be applied in the context of testing models created late in the controller development stage, where it assumes that the given model displays mostly convergent behavior and learns a CCF in an unsupervised fashion from such convergent model behaviors. This CCF is then used to guide a thorough exploration of the model with the help of optimization-guided techniques or adaptive sampling techniques, with the goal of identifying rare nonconvergent model behaviors. Underminer can also be used early in the development stage, where models may have some significant nonconvergent behaviors. Here, the framework permits designers to indicate their mental model for convergence by labeling behaviors as convergent/nonconvergent and then constructs a CCF using a supervised learning technique. In this use case, the goal is to use the CCF to test an improved design for the model. Underminer supports a number of convergence-like notions, such as those based on Lyapunov analysis and temporal logic, and also CCFs learned directly from labeled output behaviors using machine-learning techniques such as support vector machines and neural networks. We demonstrate the efficacy of Underminer by evaluating its performance on several academic as well as industrial examples.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fan:2018:SDR, author = "Chuchu Fan and James Kapinski and Xiaoqing Jin and Sayan Mitra", title = "Simulation-Driven Reachability Using Matrix Measures", journal = j-TECS, volume = "17", number = "1", pages = "21:1--21:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3126685", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Simulation-driven verification can provide formal safety guarantees for otherwise intractable nonlinear and hybrid system models. A key step in simulation-driven algorithms is to compute the reach set overapproximations from a set of initial states through numerical simulations and sensitivity analysis. This article addresses this problem by providing algorithms for computing discrepancy functions as the upper bound on the sensitivity, that is, the rate at which trajectories starting from neighboring states converge or diverge. The algorithms rely on computing local bounds on matrix measures as the exponential change rate of the discrepancy function. We present two techniques to compute the matrix measures under different norms: regular Euclidean norm or Euclidean norm under coordinate transformation, such that the exponential rate of the discrepancy function, and therefore, the conservativeness of the overapproximation, is locally minimized. The proposed algorithms enable automatic reach set computations of general nonlinear systems and have been successfully used on several challenging benchmark models. All proposed algorithms for computing discrepancy functions give soundness and relative completeness of the overall simulation-driven safety-bounded verification algorithm. We present a series of experiments to illustrate the accuracy and performance of the algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2018:PSC, author = "Hyoseung Kim and Ragunathan (Raj) Rajkumar", title = "Predictable Shared Cache Management for Multi-Core Real-Time Virtualization", journal = j-TECS, volume = "17", number = "1", pages = "22:1--22:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092946", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Real-time virtualization has gained much attention for the consolidation of multiple real-time systems onto a single hardware platform while ensuring timing predictability. However, a shared last-level cache (LLC) on modern multi-core platforms can easily hamper the timing predictability of real-time virtualization due to the resulting temporal interference among consolidated workloads. Since such interference caused by the LLC is highly variable and may have not even existed in legacy systems to be consolidated, it poses a significant challenge for real-time virtualization. In this article, we propose a predictable shared cache management framework for multi-core real-time virtualization. Our framework introduces two hypervisor-level techniques, vLLC and vColoring, that enable the cache allocation of individual tasks running in a virtual machine (VM), which is not achievable by the current state of the art. Our framework also provides a cache management scheme that determines cache allocation to tasks, designs VMs in a cache-aware manner, and minimizes the aggregated utilization of VMs to be consolidated. As a proof of concept, we implemented vLLC and vColoring in the KVM hypervisor running on x86 and ARM multi-core platforms. Experimental results with three different guest OSs (i.e., Linux/RK, vanilla Linux, and MS Windows Embedded) show that our techniques can effectively control the cache allocation of tasks in VMs. Our cache management scheme yields a significant utilization benefit compared to other approaches while satisfying timing constraints.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kahkonen:2018:TPC, author = "Kari K{\"a}hk{\"o}nen and Keijo Heljanko", title = "Testing Programs with Contextual Unfoldings", journal = j-TECS, volume = "17", number = "1", pages = "23:1--23:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/2810000", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present a new algorithm that combines contextual unfoldings and dynamic symbolic execution to systematically test multithreaded programs. The approach uses symbolic execution to limit the number of input values and unfoldings to thus limit the number of thread interleavings that are needed to cover reachable local states of threads in the program under test. We show that the use of contextual unfoldings allows interleavings of threads to be succinctly represented. This can in some cases lead to a substantial reduction in the number of needed test executions when compared to previous approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gu:2018:EST, author = "Xiaozhe Gu and Arvind Easwaran", title = "Efficient Schedulability Test for Dynamic-Priority Scheduling of Mixed-Criticality Real-Time Systems", journal = j-TECS, volume = "17", number = "1", pages = "24:1--24:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105922", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Systems in many safety-critical application domains are subject to certification requirements. In such a system, there are typically different applications providing functionalities that have varying degrees of criticality. Consequently, the certification requirements for functionalities at these different criticality levels are also varying, with very high levels of assurance required for a highly critical functionality, whereas relatively low levels of assurance are required for a less critical functionality. Considering the timing assurance given to various applications in the form of guaranteed budgets within deadlines, a theory of real-time scheduling for such multi-criticality systems has been recently under development. In particular, an algorithm called Earliest Deadline First with Virtual Deadlines (EDF-VD) has shown a lot of promise for systems with two criticality levels, especially in terms of practical performance demonstrated through experiment results. In this article, we design a new schedulability test for EDF-VD that extends these performance benefits to multi-criticality systems. We propose a new test based on demand bound functions and also present a novel virtual deadline assignment strategy. Through extensive experiments, we show that the proposed technique significantly outperforms existing strategies for a variety of generic real-time systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kulkarni:2018:LOC, author = "Amey Kulkarni and Colin Shea and Tahmid Abtahi and Houman Homayoun and Tinoosh Mohsenin", title = "Low Overhead {CS}-Based Heterogeneous Framework for Big Data Acceleration", journal = j-TECS, volume = "17", number = "1", pages = "25:1--25:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092944", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Big data processing on hardware gained immense interest among the hardware research community to take advantage of fast processing and reconfigurability. Though the computation latency can be reduced using hardware, big data processing cost is dominated by data transfers. In this article, we propose a low overhead framework based on compressive sensing (CS) to reduce data transfers up to 67\% without affecting signal quality. CS has two important kernels: ``sensing'' and ``reconstruction.'' In this article, we focus on CS reconstruction is using orthogonal matching pursuit (OMP) algorithm. We implement the OMP CS reconstruction algorithm on a domain-specific PENC many-core platform and a low-power Jetson TK1 platform consisting of an ARM CPU and a K1 GPU. Detailed performance analysis of OMP algorithm on each platform suggests that the PENC many-core platform has $ 15 \times $ and $ 18 \times $ less energy consumption and $ 16 \times $ and $ 8 \times $ faster reconstruction time as compared to the low-power ARM CPU and K1 GPU, respectively. Furthermore, we implement the proposed CS-based framework on heterogeneous architecture, in which the PENC many-core architecture is used as an ``accelerator'' and processing is performed on the ARM CPU platform. For demonstration, we integrate the proposed CS-based framework with a Hadoop MapReduce platform for a face detection application. The results show that the proposed CS-based framework with the PENC many-core as an accelerator achieves a 26.15\% data storage/transfer reduction, with an execution time and energy consumption overhead of 3.7\% and 0.002\%, respectively, for 5,000 image transfers. Compared to the CS-based framework implementation on the low-power Jetson TK1 ARM CPU+GPU platform, the PENC many-core implementation is $ 2.3 \times $ faster for the image reconstruction part, while achieving 29\% higher performance and 34\% better energy efficiency for the complete face detection application on the Hadoop MapReduce platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nodeh:2018:EAM, author = "Mohammad Taghi Teimoori Nodeh and Mostafa Bazzaz and Alireza Ejlali", title = "Exploiting Approximate {MLC-PCM} in Low-Power Embedded Systems", journal = j-TECS, volume = "17", number = "1", pages = "26:1--26:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105926", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multi-level cell phase change memory (MLC-PCM), because of its very low leakage power and high density, is promising for embedded systems. Furthermore, for applications with inherent low sensitivity to errors, approximate write operations can be exploited in MLC-PCM to improve endurance and performance. However, data that reside in the approximate MLC-PCM for a rather long time without refreshing are prone to soft errors due to resistance drift phenomenon, while even for an application with inherent low sensitivity to errors, a high soft error rate can degrade its Quality of Result (QoR). The architecture-level approaches to decrease the drift effect incur considerable power overhead (about 100\%), which is a prominent issue in embedded systems, and are dependent on the number of logic levels stored in the PCM cell (e.g., most of them are designed for 4LC-PCM). This article, taking a different approach, proposes a drift-aware frequency and voltage management to alleviate the drift-based soft-error rate. To this end, first we characterize the application data based on the degree of being exposed to the drift to identify the drift-prone application data. Then we assign the execution frequency and voltage to different regions of the application considering the drift. This frequency assignment speeds up the application regions wherein the drift-prone data are accessed to shorten the lifetime of the drift-prone data, thereby decreasing the soft error rate. An integer linear programming model implements our proposed Dynamic Voltage Frequency Scaling (DVFS). Also, the proposed approach is independent of the number of levels of PCM cells and can be applied to any MLC-PCM system. To evaluate the approach, the approximate MLC-PCM is simulated using empirical models and is integrated into a full-system simulator as data memory. The experimental results show that, by exploiting the approach, QoR is in the acceptable range, while its power overhead is about 84\% (on average) less than that of the architecture-level approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gaglio:2018:DPD, author = "Salvatore Gaglio and Giuseppe {Lo Re} and Gloria Martorella and Daniele Peri", title = "{DC4CD}: a Platform for Distributed Computing on Constrained Devices", journal = j-TECS, volume = "17", number = "1", pages = "27:1--27:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105923", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present Distributed Computing for Constrained Devices (DC4CD), a novel software architecture that supports symbolic distributed computing on wireless sensor networks. DC4CD integrates the functionalities of a high-level symbolic interpreter, a compiler, and an operating system, and includes networking abstractions to exchange high-level symbolic code among peer devices. Contrarily to other architectures proposed in the literature, DC4CD allows for changes at runtime, even on deployed nodes of both application and system code. Experimental results show that DC4CD is more efficient in terms of memory usage than existing architectures, with which it also compares well in terms of execution efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Namazi:2018:MBR, author = "Alireza Namazi and Meisam Abdollahi and Saeed Safari and Siamak Mohammadi", title = "A Majority-Based Reliability-Aware Task Mapping in High-Performance Homogeneous {NoC} Architectures", journal = j-TECS, volume = "17", number = "1", pages = "28:1--28:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3131273", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a new reliability-aware task mapping approach in a many-core platform at design time for applications with DAG-based task graphs. The main goal is to devise a task mapping which meets a predefined reliability threshold considering a minimized performance degradation. The proposed approach uses a majority-voting replication technique to fulfill error-masking capability. A quantitative reliability model is also proposed for the platform. Our platform is a homogeneous many-core architecture with mesh-based interconnection using traditional deterministic XY routing algorithm. Our iterative approach is applicable to an unlimited number of system fault types. All parts of the platform, including cores, links, and routers, are assumed to be prone to failures. We used the MNLP optimization technique to find the optimal mapping of the presented task graph. Experimental results show that our suggested task mappings not only comply with predefined reliability thresholds but also achieve notable time complexity reduction with respect to exhaustive space exploration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2018:EIC, author = "Sandeep K. Shukla", title = "Editorial: {Industry 4.0} --- a Confluence of Embedded Artificial Intelligence, Machine Learning, Robotics and Security", journal = j-TECS, volume = "17", number = "2", pages = "29:1--29:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3194944", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leonard:2018:GES, author = "Elizabeth Leonard", title = "Guest Editorial: Special Issue on Formal Methods and Models for System Design", journal = j-TECS, volume = "17", number = "2", pages = "30:1--30:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162079", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tanase:2018:SML, author = "Alexandru Tanase and Michael Witterauf and J{\"u}rgen Teich and Frank Hannig", title = "Symbolic Multi-Level Loop Mapping of Loop Programs for Massively Parallel Processor Arrays", journal = j-TECS, volume = "17", number = "2", pages = "31:1--31:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3092952", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Today's MPSoCs (multiprocessor systems-on-chip) have brought up massively parallel processor array accelerators that may achieve a high computational efficiency by exploiting multiple levels of parallelism and different memory hierarchies. Such parallel processor arrays are perfect targets, particularly for the acceleration of nested loop programs due to their regular and massively parallel nature. However, existing loop parallelization techniques are often unable to exploit multiple levels of parallelism and are either I/O or memory bounded. Furthermore, if the number of available processing elements becomes only known at runtime-as in adaptive systems-static approaches fail. In this article, we solve some of these problems by proposing a hybrid compile/runtime multi-level symbolic parallelization technique that is able to: (a) exploit multiple levels of parallelism as well as (b) different memory hierarchies, and (c) to match the I/O or memory capabilities of the target architecture for scenarios where the number of available processing elements is only known at runtime. Our proposed technique consists of two compile-time transformations: (a) symbolic hierarchical tiling followed by (b) symbolic multi-level scheduling. The tiling levels scheduled in parallel exploit different levels of parallelism, whereas the sequential one, different memory hierarchies. Furthermore, by tuning the size of the tiles on the individual levels, a tradeoff between the necessary I/O-bandwidth and memory is possible, which facilitates obeying resource constraints. The resulting schedules are symbolic with respect to the problem size and tile sizes. Thus, the number of processing elements to map onto does not need to be known at compile time. At runtime, when the number of available processors becomes known, a simple prologue chooses a feasible schedule with respect to I/O and memory constraints that is latency-optimal for the chosen tile size. In summary, our approach determines the set of feasible, latency-optimal symbolic loop schedule candidates at compile time, from which one is dynamically selected at runtime. This approach exploits multiple levels of parallelism, is independent of the problem size of the loop nest, and thereby avoids any expensive re-compilation at runtime. This is particularly important for low cost and memory-scarce embedded MPSoC platforms that may not afford to host a just-in-time compiler.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Attie:2018:MPR, author = "Paul C. Attie and Kinan Dak {Al Bab} and Mouhammad Sakr", title = "Model and Program Repair via {SAT} Solving", journal = j-TECS, volume = "17", number = "2", pages = "32:1--32:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3147426", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We consider the subtractive model repair problem: given a finite Kripke structure M and a CTL formula $ \eta $, determine if M contains a substructure M$^'$ that satisfies $ \eta $. Thus, M can be ``repaired'' to satisfy eta by deleting some transitions and states. We map an instance $ \langle M, \eta \rangle $ of model repair to a Boolean formula repair $ (M, \eta)$ such that $ \langle M, \eta \rangle $ has a solution iff repair $ (M, \eta)$ is satisfiable. Furthermore, a satisfying assignment determines which states and transitions must be removed from $M$ to yield a model $ M^'$ of $ \eta $. Thus, we can use any SAT solver to repair Kripke structures. Using a complete SAT solver yields a complete algorithm: it always finds a repair if one exists. We also show that CTL model repair is NP-complete. We extend the basic repair method in three directions: (1) the use of abstraction mappings, that is, repair a structure abstracted from M and then concretize the resulting repair to obtain a repair of M, (2) repair concurrent Kripke structures and concurrent programs: we use the pairwise method of Attie and Emerson to represent and repair the behavior of a concurrent program, as a set of ``concurrent Kripke structures'', with only a quadratic increase in the size of the repair formula, and (3) repair hierarchical Kripke structures: we use a CTL formula to summarize the behavior of each ``box,'' and CTL deduction to relate the box formula with the overall specification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Smyth:2018:SSC, author = "Steven Smyth and Christian Motika and Karsten Rathlev and Reinhard {Von Hanxleden} and Michael Mendler", title = "{SCEst}: Sequentially Constructive {Esterel}", journal = j-TECS, volume = "17", number = "2", pages = "33:1--33:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3063129", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The synchronous language Esterel provides determinate concurrency for reactive systems. Determinacy is ensured by the signal coherence rule, which demands that signals have a stable value throughout one reaction cycle. This is natural for the original application domains of Esterel, such as controller design and hardware development; however, it is unnecessarily restrictive for software development. Sequentially Constructive Esterel (SCEst) overcomes this restriction by allowing values to change instantaneously, as long as determinacy is still guaranteed, adopting the recently proposed Sequentially Constructive model of computation. SCEst is grounded in the minimal Sequentially Constructive Language (scl), which also provides a novel semantic definition and compilation approach for Esterel.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dokhanchi:2018:FRD, author = "Adel Dokhanchi and Bardh Hoxha and Georgios Fainekos", title = "Formal Requirement Debugging for Testing and Verification of Cyber-Physical Systems", journal = j-TECS, volume = "17", number = "2", pages = "34:1--34:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3147451", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A framework for the elicitation and debugging of formal specifications for Cyber-Physical Systems is presented. The elicitation of specifications is handled through a graphical interface. Two debugging algorithms are presented. The first checks for erroneous or incomplete temporal logic specifications without considering the system. The second can be utilized for the analysis of reactive requirements with respect to system test traces. The specification debugging framework is applied on a number of formal specifications collected through a user study. The user study establishes that requirement errors are common and that the debugging framework can resolve many insidious specification errors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2018:FPS, author = "Zheng Li and Shuibing He", title = "Fixed-Priority Scheduling for Two-Phase Mixed-Criticality Systems", journal = j-TECS, volume = "17", number = "2", pages = "35:1--35:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3105921", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, a two-phase execution model is proposed for mixed-criticality (MC) tasks. Different from traditional MC tasks with a computation phase only, the two-phase execution model requires a memory-access phase first to fetch the instructions and data, and then computation. Theoretical foundations are first established for a schedulability test under given memory-access and computation priority assignment. Based on the established theoretical conclusions, a two-stage priority assignment algorithm, which can find the best priority assignment for both memory-access and computation phases under fixed-priority scheduling, is further developed. Extensive experiments have been conducted and the experimental results validate the effectiveness of our proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2018:EVL, author = "Lihao Liang and Tom Melham and Daniel Kroening and Peter Schrammel and Michael Tautschnig", title = "Effective Verification for Low-Level Software with Competing Interrupts", journal = j-TECS, volume = "17", number = "2", pages = "36:1--36:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3147432", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Interrupt-driven software is difficult to test and debug, especially when interrupts can be nested and subject to priorities. Interrupts can arrive at arbitrary times, leading to an exponential blow-up in the number of cases to consider. We present a new formal approach to verifying interrupt-driven software based on symbolic execution. The approach leverages recent advances in the encoding of the execution traces of interacting, concurrent threads. We assess the performance of our method on benchmarks drawn from embedded systems code and device drivers, and experimentally compare it to conventional approaches that use source-to-source transformations. Our results show that our method significantly outperforms these techniques. To the best of our knowledge, our work is the first to demonstrate effective verification of low-level embedded software with nested interrupts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xie:2018:ESA, author = "Xinfeng Xie and Dayou Du and Qian Li and Yun Liang and Wai Teng Tang and Zhong Liang Ong and Mian Lu and Huynh Phung Huynh and Rick Siow Mong Goh", title = "Exploiting Sparsity to Accelerate Fully Connected Layers of {CNN}-Based Applications on Mobile {SoCs}", journal = j-TECS, volume = "17", number = "2", pages = "37:1--37:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3122788", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Convolutional neural networks (CNNs) are widely employed in many image recognition applications. With the proliferation of embedded and mobile devices, such applications are becoming commonplace on mobile devices. Network pruning is a commonly used strategy to reduce the memory and storage footprints of CNNs on mobile devices. In this article, we propose customized versions of the sparse matrix multiplication algorithm to speed up inference on mobile devices and make it more energy efficient. Specifically, we propose a Block Compressed Sparse Column algorithm and a bit-representation-based algorithm (BitsGEMM) that exploit sparsity to accelerate the fully connected layers of a network on the NVIDIA Jetson TK1 platform. We evaluate the proposed algorithms using real-world object classification and object detection applications. Experiments show that performance speedups can be achieved over the original baseline implementation using cuBLAS. On object detection CNNs, an average speedup of $ 1.82 \times $ is obtained over baseline cuBLAS in the fully connected layer of the VGG model, whereas on classification CNNs, an average speedup of $ 1.51 \times $ is achieved for the fully connected layer of the pruned-VGG model. Energy consumption reduction of 43--46\% is also observed due to decreased computational and memory bandwidth demands.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lu:2018:TSI, author = "Sixing Lu and Roman Lysecky", title = "Time and Sequence Integrated Runtime Anomaly Detection for Embedded Systems", journal = j-TECS, volume = "17", number = "2", pages = "38:1--38:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3122785", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network-connected embedded systems grow on a large scale as a critical part of Internet of Things, and these systems are under the risk of increasing malware. Anomaly-based detection methods can detect malware in embedded systems effectively and provide the advantage of detecting zero-day exploits relative to signature-based detection methods, but existing approaches incur significant performance overheads and are susceptible to mimicry attacks. In this article, we present a formal runtime security model that defines the normal system behavior including execution sequence and execution timing. The anomaly detection method in this article utilizes on-chip hardware to non-intrusively monitor system execution through trace port of the processor and detect malicious activity at runtime. We further analyze the properties of the timing distribution for control flow events, and select subset of monitoring targets by three selection metrics to meet hardware constraint. The designed detection method is evaluated by a network-connected pacemaker benchmark prototyped in FPGA and simulated in SystemC, with several mimicry attacks implemented at different levels. The resulting detection rate and false positive rate considering constraints on the number of monitored events supported in the on-chip hardware demonstrate good performance of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ballabriga:2018:SWC, author = "Cl{\'e}ment Ballabriga and Julien Forget and Giuseppe Lipari", title = "Symbolic {WCET} Computation", journal = j-TECS, volume = "17", number = "2", pages = "39:1--39:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3147413", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Parametric Worst-case execution time (WCET) analysis of a sequential program produces a formula that represents the worst-case execution time of the program, where parameters of the formula are user-defined parameters of the program (as loop bounds, values of inputs, or internal variables, etc). In this article we propose a novel methodology to compute the parametric WCET of a program. Unlike other algorithms in the literature, our method is not based on Integer Linear Programming (ILP). Instead, we follow an approach based on the notion of symbolic computation of WCET formulae. After explaining our methodology and proving its correctness, we present a set of experiments to compare our method against the state of the art. We show that our approach dominates other parametric analyses and produces results that are very close to those produced by non-parametric ILP-based approaches, while keeping very good computing time.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dutt:2018:ADA, author = "Sunil Dutt and Sukumar Nandi and Gaurav Trivedi", title = "Analysis and Design of Adders for Approximate Computing", journal = j-TECS, volume = "17", number = "2", pages = "40:1--40:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3131274", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The concept of approximate computing, that is, to sacrifice computation quality for computation efforts, has recently emerged as a promising design approach. Over the past decade, several research works have explored approximate computing at both the software level and hardware level of abstraction with encouraging results. At the hardware level of abstraction, adders (being the fundamental and most widely used data operators in digital systems) have attracted a significant attention for approximation. In this article, we first explain briefly the need/significance of approximate adders. We then propose four Approximate Full Adders (AFAs) for high-performance energy-efficient approximate computing. The key design objective behind the proposed AFAs is to curtail the length of carry propagation subjected to minimal error rate. Next, we exploit one of the proposed AFAs (optimal one) to construct an N-bit approximate adder that hereinafter is referred as ``ApproxADD.'' An emergent property of ApproxADD is that carries do not propagate in it, and, consequently, it provides bit-width-aware constant delay (O(1)). ApproxADD also provides improvement in dynamic power consumption by 46.31\% and in area by 28.57\% w.r.t. Ripple Carry Adder (RCA), which exhibits the lowest power and area. Although ApproxADD provides a significant improvement in delay, power, and area, it may not be preferred for some of the error-resilient applications because its: (i) Error Distance (ED) is too high; and (ii) Error Rate (ER) increases rapidly with bit-width ($N$). To improve ED and ER, we exploit the concept of carry-lifetime and Error Detection and Correction logic, respectively. In this way, we introduce two more (improved) versions of ApproxADD--ApproxADD $ \upsilon $ 1 and ApproxADD. We call these as ApproxADD $ \upsilon $ 1 and ApproxADD $ \upsilon $ 2 with existing approximate adders based on conventional design metrics and approximate computing design metrics. Furthermore, to inspect effectiveness of the proposed approach in real-life applications, we demonstrate image compression and decompression by replacing the conventional addition operations in Discrete Cosine Transform (DCT) and Inverse Discrete Cosine Transform (IDCT) modules with ApproxADD $ \upsilon $ 2.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leech:2018:RPP, author = "Charles Leech and Charan Kumar and Amit Acharyya and Sheng Yang and Geoff V. Merrett and Bashir M. Al-Hashimi", title = "Runtime Performance and Power Optimization of Parallel Disparity Estimation on Many-Core Platforms", journal = j-TECS, volume = "17", number = "2", pages = "41:1--41:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3133560", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article investigates the use of many-core systems to execute the disparity estimation algorithm, used in stereo vision applications, as these systems can provide flexibility between performance scaling and power consumption. We present a learning-based runtime management approach that achieves a required performance threshold while minimizing power consumption through dynamic control of frequency and core allocation. Experimental results are obtained from a 61-core Intel Xeon Phi platform for the aforementioned investigation. The same performance can be achieved with an average reduction in power consumption of 27.8\% and increased energy efficiency by 30.04\% when compared to Dynamic Voltage and Frequency Scaling control alone without runtime management.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2018:FRT, author = "Ganghee Lee and Ediz Cetin and Oliver Diessel", title = "Fault Recovery Time Analysis for Coarse-Grained Reconfigurable Architectures", journal = j-TECS, volume = "17", number = "2", pages = "42:1--42:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3140944", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Coarse-grained reconfigurable architectures (CGRAs) have drawn increasing attention due to their performance and flexibility advantages. Typically, CGRAs incorporate many processing elements in the form of an array, which is suitable for implementing spatial redundancy, as used in the design of fault-tolerant systems. This article introduces a recovery time model for transient faults in CGRAs. The proposed fault-tolerant CGRAs are based on triple modular redundancy and coding techniques for error detection and correction. To evaluate the model, several kernels from space computing are mapped onto the suggested architecture. We demonstrate the tradeoff between recovery time, performance, and area. In addition, the average execution time of an application including recovery time is evaluated using area-based error-rate estimates in harsh radiation environments. The results show that task partitioning is important for bounding the recovery time of applications that have long execution times. It is also shown that error-correcting code (ECC) is of limited practical value for tasks with long execution times in high radiation environments, or when the degree of task partitioning is high.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Harrison:2018:CPR, author = "David C. Harrison and Winston K. G. Seah and Ramesh Rayudu", title = "Coverage Preservation with Rapid Forwarding in Energy-Harvesting Wireless Sensor Networks for Critical Rare Events", journal = j-TECS, volume = "17", number = "2", pages = "43:1--43:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3140961", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless sensor networks for rarely occurring critical events must maintain sensing coverage and low-latency network connectivity to ensure event detection and subsequent rapid propagation of notification messages. Few algorithms have been proposed that address both coverage and forwarding and those that do are either unconcerned with rapid propagation or are not optimised to handle the constant changes in topology observed in duty-cycled networks. This article proposes an algorithm for Coverage Preservation with Rapid Forwarding (CPRF). The algorithm is shown to deliver perfect coverage maintenance and low-latency guaranteed message propagation whilst allowing stored-charge conservation via collaborative duty cycling in energy-harvesting networks. Favourable comparisons are made against established and recently proposed algorithms in both sparse planned and dense random distributions. Further, an implementation for commercially available wireless sensing devices is evaluated for detection and notification of damage to highway light poles caused by vortex shedding.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2018:ECB, author = "He Li and Kaoru Ota and Mianxiong Dong", title = "Energy Cooperation in Battery-Free Wireless Communications with Radio Frequency Energy Harvesting", journal = j-TECS, volume = "17", number = "2", pages = "44:1--44:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3141249", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Radio frequency (RF) energy harvesting techniques are becoming a potential method to power battery-free wireless networks. In RF energy harvesting communications, energy cooperation enables shaping and optimization of the energy arrivals at the energy-receiving node to improve the overall system performance. In this article, we propose an energy cooperation scheme that enables energy cooperation in battery-free wireless networks with RF harvesting. We first study the battery-free wireless network with RF energy harvesting and then state the problem that optimizing the system performance with limited harvesting energy through new energy cooperation protocol. Finally, from the extensive simulation results, our energy cooperation protocol performs better than the original battery-free wireless network solution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2018:SCG, author = "Jurn-Gyu Park and Chen-Ying Hsieh and Nikil Dutt and Sung-Soo Lim", title = "Synergistic {CPU--GPU} Frequency Capping for Energy-Efficient Mobile Games", journal = j-TECS, volume = "17", number = "2", pages = "45:1--45:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3145337", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Mobile platforms are increasingly using Heterogeneous Multiprocessor Systems-on-Chip (HMPSoCs) with differentiated processing cores and GPUs to achieve high performance for graphics-intensive applications such as mobile games. Traditionally, separate CPU and GPU governors are deployed in order to achieve energy efficiency through Dynamic Voltage Frequency Scaling (DVFS) but miss opportunities for further energy savings through coordinated system-level application of DVFS. We present a cooperative CPU-GPU DVFS strategy (called Co-Cap) that orchestrates energy-efficient CPU and GPU DVFS through synergistic CPU and GPU frequency capping to avoid frequency overprovisioning while maintaining desired performance. Unlike traditional approaches that target a narrow set of mobile games, our Co-Cap approach is applicable across a wide range of microbenchmarks and mobile games. Our methodology employs a systematic training phase using fine-grained refinement steps with evaluations of frequency capping tables followed by a deployment phase, allowing deployment across a wide range of microbenchmarks and mobile games with varying graphics workloads. Our experimental results across multiple sets of over 200 microbenchmarks and 40 mobile games show that Co-Cap improves energy per frame by on average 8.9\% (up to 18.3\%) and 7.8\% (up to 27.6\%) (16.6\% and 15.7\% in CPU-dominant applications) and achieves minimal frames-per-second (FPS) loss by 0.9\% and 0.85\% (1.3\% and 1.5\% in CPU-dominant applications) on average in training and deployment sets, respectively, compared to the default CPU and GPU governors, with negligible overhead in execution time and power consumption on the ODROID-XU3 platform.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Narayan:2018:MTR, author = "Apurva Narayan and Greta Cutulenco and Yogi Joshi and Sebastian Fischmeister", title = "Mining Timed Regular Specifications from System Traces", journal = j-TECS, volume = "17", number = "2", pages = "46:1--46:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3147660", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Temporal properties define the order of occurrence and timing constraints on event occurrence. Such specifications are important for safety-critical real-time systems. We propose a framework for automatically mining temporal properties that are in the form of timed regular expressions (TREs) from system traces. Using an abstract structure of the property, the framework constructs a finite state machine to serve as an acceptor. We analytically derive speedup for the fragment and confirm the speedup using empirical validation with synthetic traces. The framework is evaluated on industrial-strength safety-critical real-time applications using traces with more than 1 million entries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shoushtari:2018:SIS, author = "Majid Shoushtari and Bryan Donyanavard and Luis Angel D. Bathen and Nikil Dutt", title = "{ShaVe-ICE}: Sharing Distributed Virtualized {SPMs} in Many-Core Embedded Systems", journal = j-TECS, volume = "17", number = "2", pages = "47:1--47:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157667", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traditional approaches for managing software-programmable memories (SPMs) do not support sharing of distributed on-chip memory resources and, consequently, miss the opportunity to better utilize those memory resources. Managing on-chip memory resources in many-core embedded systems with distributed SPMs requires runtime support to share memory resources between various threads with different memory demands running concurrently. Runtime SPM managers cannot rely on prior knowledge about the dynamically changing mix of threads that will execute and therefore should be designed in a way that enables SPM allocations for any unpredictable mix of threads contending for on-chip memory space. This article proposes ShaVe-ICE, an operating-system-level solution, along with hardware support, to virtualize and ultimately share SPM resources across a many-core embedded system to reduce the average memory latency. We present a number of simple allocation policies to improve performance and energy. Experimental results show that sharing SPMs could reduce the average execution time of the workload up to 19.5\% and reduce the dynamic energy consumed in the memory subsystem up to 14\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2018:AFI, author = "Zhijian He and Yao Chen and Zhaoyan Shen", title = "Attitude Fusion of Inertial and Magnetic Sensor under Different Magnetic Filed Distortions", journal = j-TECS, volume = "17", number = "2", pages = "48:1--48:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157668", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "By virtue of gravity measurement from a handheld inertial measurement unit (IMU) sensor, current indoor attitude estimation algorithms can provide accurate roll/pitch dimension angles. Acquisition of precise heading is limited by the absence of accurate magnetic reference. Consequently, initial stage magnetometer calibration is deployed to alleviate this bottleneck in attitude fusion. However, available algorithms tackle magnetic distortion based on time-invariant surroundings, casting the post-calibration magnetic data into unchanged ellipsoid centered in the calibration place. Consequently, inaccurate fusion results are formulated in a more common case of random walk in time-varying magnetic indoor environment. This article proposes a new fusion algorithm from various kinds of IMU sensors, namely gyroscope, accelerometer, and magnetometer. Compared to state-of-the-art attitude fusion approaches, this article addresses the indoor time-varying magnetic perturbation problem in a geometric view. We propose an extend Kalman filter--based algorithm based on this detailed geometric model to eliminate the position-dependent effect of a compass sensor. Experimental data demonstrate that, under different indoor magnetic distortion environments, our proposed attitude fusion algorithm has the maximum angle error of 2.02${}^\circ $, outperforming 7.17${}^\circ $ of a gradient-declining-based algorithm. Additionally, this attitude fusion result is constructed in a low-cost handheld arduino core--based IMU device, which can be widely applied to embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhattacharjee:2018:CRM, author = "Sukanta Bhattacharjee and Yi-Ling Chen and Juinn-Dar Huang and Bhargab B. Bhattacharya", title = "Concentration-Resilient Mixture Preparation with Digital Microfluidic Lab-on-Chip", journal = j-TECS, volume = "17", number = "2", pages = "49:1--49:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157094", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sample preparation plays a crucial role in almost all biochemical applications, since a predominant portion of biochemical analysis time is associated with sample collection, transportation, and preparation. Many sample-preparation algorithms are proposed in the literature that are suitable for execution on programmable digital microfluidic (DMF) platforms. In most of the existing DMF-based sample-preparation algorithms, a fixed target ratio is provided as input, and the corresponding mixing tree is generated as output. However, in many biochemical applications, target mixtures with exact component proportions may not be needed. From a biochemical perspective, it may be sufficient to prepare a mixture in which the input reagents may lie within a range of concentration factors. The choice of a particular valid ratio, however, strongly impacts solution-preparation cost and time. To address this problem, we propose a concentration-resilient ratio-selection method from the input ratio space so that the reactant cost is minimized. We propose an integer linear programming--based method that terminates very fast while producing the optimum solution, considering both uniform and weighted cost of reagents. Experimental results reveal that the proposed method can be used conveniently in tandem with several existing sample-preparation algorithms for improving their performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2018:MCV, author = "Shuoxin Lin and Jiahao Wu and Shuvra S. Bhattacharyya", title = "Memory-Constrained Vectorization and Scheduling of Dataflow Graphs for Hybrid {CPU--GPU} Platforms", journal = j-TECS, volume = "17", number = "2", pages = "50:1--50:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157669", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The increasing use of heterogeneous embedded systems with multi-core CPUs and Graphics Processing Units (GPUs) presents important challenges in effectively exploiting pipeline, task, and data-level parallelism to meet throughput requirements of digital signal processing applications. Moreover, in the presence of system-level memory constraints, hand optimization of code to satisfy these requirements is inefficient and error prone and can therefore, greatly slow down development time or result in highly underutilized processing resources. In this article, we present vectorization and scheduling methods to effectively exploit multiple forms of parallelism for throughput optimization on hybrid CPU-GPU platforms, while conforming to system-level memory constraints. The methods operate on synchronous dataflow representations, which are widely used in the design of embedded systems for signal and information processing. We show that our novel methods can significantly improve system throughput compared to previous vectorization and scheduling approaches under the same memory constraints. In addition, we present a practical case-study of applying our methods to significantly improve the throughput of an orthogonal frequency division multiplexing receiver system for wireless communications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2018:HPH, author = "Tian Huang and Yongxin Zhu and Yajun Ha and Xu Wang and Meikang Qiu", title = "A Hardware Pipeline with High Energy and Resource Efficiency for {FMM} Acceleration", journal = j-TECS, volume = "17", number = "2", pages = "51:1--51:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157670", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The fast multipole method (FMM) is a promising mathematical technique that accelerates the calculation of long-ranged forces in the large-sized n-body problem. Existing implementations of the FMM on general-purpose processors are energy and resource inefficient. To mitigate these issues, we propose a hardware pipeline that accelerates three key FMM steps. The pipeline improves energy efficiency by exploiting fine-granularity parallelism of the FMM. We reuse the pipeline for different FMM steps to reduce resource usage by 66\%. Compared to the state-of-the-art implementations on CPUs and GPUs, our implementation requires 15\% less energy and delivers 2.61 times more floating-point operations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Qian:2018:ECD, author = "Kun Qian and Chenshu Wu and Zheng Yang and Yunhao Liu and Fugui He and Tianzhang Xing", title = "Enabling Contactless Detection of Moving Humans with Dynamic Speeds Using {CSI}", journal = j-TECS, volume = "17", number = "2", pages = "52:1--52:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3157677", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Device-free passive detection is an emerging technology to detect whether there exist any moving entities in the areas of interest without attaching any device to them. It is an essential primitive for a broad range of applications including intrusion detection for safety precautions, patient monitoring in hospitals, child and elder care at home, and so forth. Despite the prevalent signal feature Received Signal Strength (RSS), most robust and reliable solutions resort to a finer-grained channel descriptor at the physical layer, e.g., the Channel State Information (CSI) in the 802.11n standard. Among a large body of emerging techniques, however, few of them have explored the full potential of CSI for human detection. Moreover, space diversity supported by nowadays popular multiantenna systems are not investigated to a comparable extent as frequency diversity. In this article, we propose a novel scheme for device-free PAssive Detection of moving humans with dynamic Speed (PADS). Both full information (amplitude and phase) of CSI and space diversity across multiantennas in MIMO systems are exploited to extract and shape sensitive metrics for accuracy and robust target detection. We prototype PADS on commercial WiFi devices, and experiment results in different scenarios demonstrate that PADS achieves great performance improvement in spite of dynamic human movements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guo:2018:CSP, author = "Danlu Guo and Mohamed Hassan and Rodolfo Pellizzoni and Hiren Patel", title = "A Comparative Study of Predictable {DRAM} Controllers", journal = j-TECS, volume = "17", number = "2", pages = "53:1--53:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3158208", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, the research community has introduced several predictable dynamic random-access memory (DRAM) controller designs that provide improved worst-case timing guarantees for real-time embedded systems. The proposed controllers significantly differ in terms of arbitration, configuration, and simulation environment, making it difficult to assess the contribution of each approach. To bridge this gap, this article provides the first comprehensive evaluation of state-of-the-art predictable DRAM controllers. We propose a categorization of available controllers, and introduce an analytical performance model based on worst-case latency. We then conduct an extensive evaluation for all state-of-the-art controllers based on a common simulation platform, and discuss findings and recommendations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mozaffari-Kermani:2018:ERE, author = "Mehran Mozaffari-Kermani and Reza Azarderakhsh and Ausmita Sarker and Amir Jalali", title = "Efficient and Reliable Error Detection Architectures of Hash-Counter-Hash Tweakable Enciphering Schemes", journal = j-TECS, volume = "17", number = "2", pages = "54:1--54:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3159173", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Through pseudorandom permutation, tweakable enciphering schemes (TES) constitute block cipher modes of operation which perform length-preserving computations. The state-of-the-art research has focused on different aspects of TES, including implementations on hardware [field-programmable gate array (FPGA)/ application-specific integrated circuit (ASIC)] and software (hard/soft-core microcontrollers) platforms, algorithmic security, and applicability to sensitive, security-constrained usage models. In this article, we propose efficient approaches for protecting such schemes against natural and malicious faults. Specifically, noting that intelligent attackers do not merely get confined to injecting multiple faults, one major benchmark for the proposed schemes is evaluation toward biased and burst fault models. We evaluate a variant of TES, i.e., the Hash-Counter-Hash scheme, which involves polynomial hashing as other variants are either similar or do not constitute finite field multiplication which, by far, is the most involved operation in TES. In addition, we benchmark the overhead and performance degradation on the ASIC platform. The results of our error injection simulations and ASIC implementations show the suitability of the proposed approaches for a wide range of applications including deeply embedded systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Real:2018:ADS, author = "Maria M{\'e}ndez Real and Philipp Wehner and Vianney Lapotre and Diana G{\"o}hringer and Guy Gogniat", title = "Application Deployment Strategies for Spatial Isolation on Many-Core Accelerators", journal = j-TECS, volume = "17", number = "2", pages = "55:1--55:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3168383", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Current cache Side-Channel Attacks (SCAs) countermeasures have not been designed for many-core architectures and need to be revisited in order to be practical for these new technologies. Spatial isolation of resources for sensitive applications has been proposed taking advantage of the large number of resources offered by these architectures. This solution avoids cache sharing with sensitive processes. Consequently, their cache activity cannot be monitored and cache SCAs cannot be performed. This work focuses on the implementation of this technique in order to minimize the induced performance overhead. Different strategies for the management of isolated secure zones are implemented and compared.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sui:2018:LOP, author = "Yulei Sui and Xiaokang Fan and Hao Zhou and Jingling Xue", title = "Loop-Oriented Pointer Analysis for Automatic {SIMD} Vectorization", journal = j-TECS, volume = "17", number = "2", pages = "56:1--56:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3168364", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Compiler-based vectorization represents a promising solution to automatically generate code that makes efficient use of modern CPUs with SIMD extensions. Two main auto-vectorization techniques, superword-level parallelism vectorization (SLP) and loop-level vectorization (LLV), require precise dependence analysis on arrays and structs to vectorize isomorphic scalar instructions (in the case of SLP) and reduce dynamic dependence checks at runtime (in the case of LLV). The alias analyses used in modern vectorizing compilers are either intra-procedural (without tracking inter-procedural data-flows) or inter-procedural (by using field-sensitive models, which are too imprecise in handling arrays and structs). This article proposes an inter-procedural Loop-oriented Pointer Analysis for C, called Lpa, for analyzing arrays and structs to support aggressive SLP and LLV optimizations effectively. Unlike field-insensitive solutions that pre-allocate objects for each memory allocation site, our approach uses a lazy memory model to generate access-based location sets based on how structs and arrays are accessed. Lpa can precisely analyze arrays and nested aggregate structures to enable SIMD optimizations for large programs. By separating the location set generation as an independent concern from the rest of the pointer analysis, Lpa is designed so that existing points-to resolution algorithms (e.g., flow-insensitive and flow-sensitive pointer analysis) can be reused easily. We have implemented L pa fully in the LLVM compiler infrastructure (version 3.8.0). We evaluate Lpa by considering SLP and LLV, the two classic vectorization techniques, on a set of 20 C and Fortran CPU2000/2006 benchmarks. For SLP, Lpa outperforms LLVM's BasicAA and ScevAA by discovering 139 and 273 more vectorizable basic blocks, respectively, resulting in the best speedup of 2.95\% for 173.applu. For LLV, LLVM introduces totally 551 and 652 static bound checks under BasicAA and ScevAA, respectively. In contrast, Lpa has reduced these static checks to 220, with an average of 15.7 checks per benchmark, resulting in the best speedup of 7.23\% for 177.mesa.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2018:TES, author = "Feng Li and Yanbing Yang and Zicheng Chi and Liya Zhao and Yaowen Yang and Jun Luo", title = "{Trinity}: Enabling Self-Sustaining {WSNs} Indoors with Energy-Free Sensing and Networking", journal = j-TECS, volume = "17", number = "2", pages = "57:1--57:??", month = apr, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173039", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:34 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Whereas a lot of efforts have been put on energy conservation in wireless sensor networks (WSNs), the limited lifetime of these systems still hampers their practical deployments. This situation is further exacerbated indoors, as conventional energy harvesting (e.g., solar) may not always work. To enable long-lived indoor sensing, we report in this article a self-sustaining sensing system that draws energy from indoor environments, adapts its duty-cycle to the harvested energy, and pays back the environment by enhancing the awareness of the indoor microclimate through an ``energy-free'' sensing. First of all, given the pervasive operation of heating, ventilation, and air conditioning (HVAC) systems indoors, our system harvests energy from airflow introduced by the HVAC systems to power each sensor node. Secondly, as the harvested power is tiny, an extremely low but synchronous duty-cycle has to be applied whereas the system gets no energy surplus to support existing synchronization schemes. So, we design two complementary synchronization schemes that cost virtually no energy. Finally, we exploit the feature of our harvester to sense the airflow speed in an energy-free manner. To our knowledge, this is the first indoor wireless sensing system that encapsulates energy harvesting, network operating, and sensing all together.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2018:EUE, author = "Sandeep K. Shukla", title = "Editorial: To Use or Not To? {Embedded} Systems for Voting", journal = j-TECS, volume = "17", number = "3", pages = "58:1--58:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3206342", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Morse:2018:LAW, author = "Jeremy Morse and Steve Kerrison and Kerstin Eder", title = "On the Limitations of Analyzing Worst-Case Dynamic Energy of Processing", journal = j-TECS, volume = "17", number = "3", pages = "59:1--59:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173042", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article examines dynamic energy consumption caused by data during software execution on deeply embedded microprocessors, which can be significant on some devices. In worst-case energy consumption analysis, energy models are used to find the most costly execution path. Taking each instruction's worst-case energy produces a safe but overly pessimistic upper bound. Algorithms for safe and tight bounds would be desirable. We show that finding exact worst-case energy is NP-hard, and that tight bounds cannot be approximated with guaranteed safety. We conclude that any energy model targeting tightness must either sacrifice safety or accept overapproximation proportional to data-dependent energy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2018:CIA, author = "Hwajeong Seo and Ilwoong Jeong and Jungkeun Lee and Woo-Hwan Kim", title = "Compact Implementations of {ARX}-Based Block Ciphers on {IoT} Processors", journal = j-TECS, volume = "17", number = "3", pages = "60:1--60:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173455", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we present implementations for Addition, Rotation, and eXclusive-or (ARX)-based block ciphers, including LEA and HIGHT, on IoT devices, including 8-bit AVR, 16-bit MSP, 32-bit ARM, and 32-bit ARM-NEON processors. We optimized 32-/8-bitwise ARX operations for LEA and HIGHT block ciphers by considering variations in word size, the number of general purpose registers, and the instruction set of the target IoT devices. Finally, we achieved the most compact implementations of LEA and HIGHT block ciphers. The implementations were fairly evaluated through the Fair Evaluation of Lightweight Cryptographic Systems framework, and implementations won the competitions in the first and the second rounds.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hong:2018:ISP, author = "Ding-Yong Hong and Yu-Ping Liu and Sheng-Yu Fu and Jan-Jan Wu and Wei-Chung Hsu", title = "Improving {SIMD} Parallelism via Dynamic Binary Translation", journal = j-TECS, volume = "17", number = "3", pages = "61:1--61:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173456", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent trends in SIMD architecture have tended toward longer vector lengths, and more enhanced SIMD features have been introduced in newer vector instruction sets. However, legacy or proprietary applications compiled with short-SIMD ISA cannot benefit from the long-SIMD architecture that supports improved parallelism and enhanced vector primitives, resulting in only a small fraction of potential peak performance. This article presents a dynamic binary translation technique that enables short-SIMD binaries to exploit benefits of new SIMD architectures by rewriting short-SIMD loop code. We propose a general approach that translates loops consisting of short-SIMD instructions to machine-independent IR, conducts SIMD loop transformation/optimization at this IR level, and finally translates to long-SIMD instructions. Two solutions are presented to enforce SIMD load/store alignment, one for the problem caused by the binary translator's internal translation condition and one general approach using dynamic loop peeling optimization. Benchmark results show that average speedups of $ 1.51 \times $ and $ 2.48 \times $ are achieved for an ARM NEON to x86 AVX2 and x86 AVX-512 loop transformation, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2018:PEP, author = "Jiutian Zhang and Yuhang Liu and Haifeng Li and Xiaojing Zhu and Mingyu Chen", title = "{PTAT}: an Efficient and Precise Tool for Tracing and Profiling Detailed {TLB} Misses", journal = j-TECS, volume = "17", number = "3", pages = "62:1--62:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182174", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the memory access footprints of applications in areas like data analytics increase, the latency overhead of translation lookaside buffer (TLB) misses increases. Thus, the efficiency of TLB becomes increasingly critical for overall system performance. Analyzing TLB miss traces is useful for hardware architecture design and software application optimization. Utilizing cycle-accurate simulators or instrumentation tools is very time-consuming and/or inaccurate for tracing and profiling TLB misses. In this article, we propose an efficient and precise tool to collect and profile last-level TLB misses. This tool utilizes a novel software method called Page Table Access Tracing (PTAT), storing last-level page table entries of certain workload processes into a reserved uncached memory region. Therefore, each last-level TLB miss incurred by user process corresponds to one uncached page table access to main memory, which can be captured and recorded by a hardware memory bus monitor. The detected information is then dumped into offline storage. In this manner, full TLB miss traces are collected and can be analyzed flexibly. Compared to previous software-based methods, this method achieves higher performance. Experiments show that, compared with a state-of-the-art kernel instrumentation method (BadgerTrap), which lacks complete dumping trace function, the speedup is still up to 3.88-fold for memory-intensive benchmarks. Due to the improved efficiency and completeness of tracing, case studies validate that more flexible profiling can be conducted, which is of great significance for TLB performance optimization. The accuracy of PTAT is verified by both dedicated sequence and performance counters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hosseinabady:2018:DEM, author = "Mohammad Hosseinabady and Jose Luis Nunez-Yanez", title = "Dynamic Energy Management of {FPGA} Accelerators in Embedded Systems", journal = j-TECS, volume = "17", number = "3", pages = "63:1--63:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182172", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we investigate how to utilise an Field-Programmable Gate Array (FPGA) in an embedded system to save energy. For this purpose, we study the energy efficiency of a hybrid FPGA-CPU device that can switch task execution between hardware and software with a focus on periodic tasks. To increase the applicability of this task switching, we also consider the voltage and frequency scaling (VFS) applied to the FPGA to reduce the system energy consumption. We show that in some cases, if the task's period is higher than a specific level, the FPGA accelerator cannot reduce the energy consumption associated to the task and the software version is the most energy efficient option. We have applied the proposed techniques to a robot map creation algorithm as a case study which shows up to 38\% energy reduction compared to the FPGA implementation. Overall, experimental results show up to 48\% energy reduction by applying the proposed techniques at runtime on 13 individual tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2018:OND, author = "Hyeonggyu Kim and Minho Ju and Soontae Kim", title = "{OnNetwork+}: Network Delay-Aware Management for Mobile Systems", journal = j-TECS, volume = "17", number = "3", pages = "64:1--64:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182171", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Network errors such as packet losses consume large amounts of energy. We analyzed the reason for this through measurements using the latest smartphones and full-system simulation. We found that on packet losses the smartphones maintain high frequencies for CPU without doing useful work. To address this problem, we propose a method for reducing the energy consumption by lowering the performance level by exploiting a dynamic voltage and frequency scaling mechanism when long network delays are expected. According to our experiments, our method reduces the total energy consumption of web browsing on two different smartphones by up to 10.0\% and 11.5\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsoutsouras:2018:HDR, author = "Vasileios Tsoutsouras and Iraklis Anagnostopoulos and Dimosthenis Masouros and Dimitrios Soudris", title = "A Hierarchical Distributed Runtime Resource Management Scheme for {NoC}-Based Many-Cores", journal = j-TECS, volume = "17", number = "3", pages = "65:1--65:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182173", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As technology constantly strengthens its presence in all aspects of human life, computing systems integrate a high number of processing cores, whereas applications become more complex and greedy for computational resources. Inevitably, this high increase in processing elements combined with the unpredictable resource requirements of executed applications at design time impose new design constraints to resource management of many-core systems, turning the distributed functionality into a necessity. In this work, we present a distributed runtime resource management framework for many-core systems utilizing a network-on-chip (NoC) infrastructure. Specifically, we couple the concept of distributed management with parallel applications by assigning different roles to the available computing resources. The presented design is based on the idea of local controllers and managers, whereas an on-chip intercommunication scheme ensures decision distribution. The evaluation of the proposed framework was performed on an Intel Single-Chip Cloud Computer, an actual NoC-based, many-core system. Experimental results show that the proposed scheme manages to allocate resources efficiently at runtime, leading to gains of up to 30\% in application execution latency compared to relevant state-of-the-art distributed resource management frameworks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2018:CSI, author = "Hwajeong Seo", title = "Compact Software Implementation of Public-Key Cryptography on {MSP430X}", journal = j-TECS, volume = "17", number = "3", pages = "66:1--66:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3190855", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "On the low-end embedded processors, the implementations of Elliptic Curve Cryptography (ECC) are considered to be a challenging task due to the limited computation power and storage of the low-end embedded processors. Particularly, the multi-precision multiplication and squaring operations are the most expensive operations for ECC implementations. In order to enhance the performance, many works presented efficient multiplication and squaring routines on the target devices. Recent works show that 128-bit security level ECC is available within a second and this is practically fast enough for IoT services. However, previous approaches missed the other important storage issues (i.e., program size, ROM). Considering that the embedded processors only have a few KB ROM, we need to pay attention to the compact ROM size with reasonable performance. In this article, we present very compact and generic implementations of multiplication and squaring operations on the 16-bit MSP430X processors for the ECC. The implementations utilize the new 32-bit multiplier and advanced multiplication and squaring routines. Since the proposed routines are generic, the arbitrary length of operand is available with high-speed and small code size. With proposed multiplication and squaring routines, we implemented Curve25519 on the MSP430X processors. The scalar multiplication is performed within 6,666,895 clock cycles and 4,054 bytes. Compared with previous works based on the speed-optimized version, our memory-efficient version reduces the code size by 59.8\%, sacrificing the execution timing by 20.5\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yassin:2018:AAC, author = "Yahya H. Yassin and Francky Catthoor and Fabian Kloosterman and Jyh-Jang Sun and Jo{\~a}O Couto and Per Gunnar Kjeldsberg and Nick {Van Helleputte}", title = "Algorithm\slash Architecture Co-optimisation Technique for Automatic Data Reduction of Wireless Read-Out in High-Density Electrode Arrays", journal = j-TECS, volume = "17", number = "3", pages = "67:1--67:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3190854", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "High-density electrode arrays used to read out neural activity will soon surpass the limits of the amount of data that can be transferred within reasonable energy budgets. This is true for wired brain implants when the required bandwidth becomes very high, and even more so for untethered brain implants that require wireless transmission of data. We propose an energy-efficient spike data extraction solution for high-density electrode arrays, capable of reducing the data to be transferred by over 85\%. We combine temporal and spatial spike data analysis with low implementation complexity, where amplitude thresholds are used to detect spikes and the spatial location of the electrodes is used to extract potentially useful sub-threshold data on neighboring electrodes. We tested our method against a state-of-the-art spike detection algorithm, with prohibitively high implementation complexity, and found that the majority of spikes are extracted reliably. We obtain further improved quality results when ignoring very small spikes below 30\% of the voltage thresholds, resulting in 91\% accuracy. Our approach uses digital logic and is therefore scalable with an increasing number of electrodes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hammari:2018:RPD, author = "Elena Hammari and Per Gunnar Kjeldsberg and Francky Catthoor", title = "Runtime Precomputation of Data-Dependent Parameters in Embedded Systems", journal = j-TECS, volume = "17", number = "3", pages = "68:1--68:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3191311", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In many modern embedded systems, the available resources (e.g., CPU clock cycles, memory, and energy) are consumed nonuniformly while the system is under exploitation. Typically, the resource requirements in the system change with different input data that the system process. These data trigger different parts of the embedded software, resulting in different operations executed that require different hardware platform resources to be used. A significant research effort has been dedicated to develop mechanisms for runtime resource management (e.g., branch prediction for pipelined processors, prefetching of data from main memory to cache, and scenario-based design methodologies). All these techniques rely on the availability of information at runtime about upcoming changes in resource requirements. In this article, we propose a method for detecting upcoming resource changes based on preliminary calculation of software variables that have the most dynamic impact on resource requirements in the system. We apply the method on a modified real-life biomedical algorithm with real input data and estimate a 40\% energy reduction as compared to static DVFS scheduling. Comparing to dynamic DVFS scheduling, an 18\% energy reduction is demonstrated.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yoon:2018:SAF, author = "Su-Kyung Yoon and Jitae Yun and Jung-Geun Kim and Shin-Dug Kim", title = "Self-Adaptive Filtering Algorithm with {PCM}-Based Memory Storage System", journal = j-TECS, volume = "17", number = "3", pages = "69:1--69:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3190856", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article proposes a new phase change memory- (PCM) based memory storage architecture with associated self-adaptive data filtering for various embedded devices to support energy efficiency as well as high computing power. In this approach, PCM-based memory storage can be used as working memory and mass storage layers simultaneously, and a self-adaptive data filtering module composed of small DRAM dual buffers was designed to improve unfavorable PCM features, such as asymmetric read/write access latencies and limited endurance and enhance spatial/temporal localities. In particular, the self-adaptive data filtering algorithm enhances data reusability by screening potentially high reusable data and predicting adequate lifetime of those data depending on current victim time decision value. We also propose the possibility that a small amount of DRAM buffer is embedded into mobile processors, keeping this as small as possible for cost effectiveness and energy efficiency. Experimental results show that by exploiting a small amount of DRAM space for dual buffers and using the self-adaptive filtering algorithm to manage them, the proposed system can reduce execution time by a factor of 1.9 compared to the unified conventional model with same the DRAM capacity and can be considered comparable to 1.5$ \times $ DRAM capacity.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Amanollahi:2018:ERD, author = "Saba Amanollahi and Ghassem Jaberipur", title = "Extended Redundant-Digit Instruction Set for Energy-Efficient Processors", journal = j-TECS, volume = "17", number = "3", pages = "70:1--70:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3202664", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The impact of extending the instruction set architecture (ISA) of a conventional binary processor by a set of redundant-digit arithmetic instructions is studied. Selected binary arithmetic instructions within a given code sequence are replaced with appropriate redundant-digit ones. The selection criteria is so enforced to lead to overall reduction of execution energy and energy-delay product (EDP). A special branch and bound algorithm is devised to modify the dataflow graph (DFG) to a new one that takes advantage of the extended redundant-digit instruction set. The DFG is obtained, via an in-house tool, from the intermediate code representation that is normally produced by the utilized compiler. The required redundant-digit arithmetic operations (including a multiplier, a multiply accumulator, and three- to four-operand redundant-digit adders specially designed for this work) have been synthesized on 45nm NanGate technology by a Synopsys Design Compiler. To evaluate the impact of the proposed ISA augmentation on actual code execution, the simulation and evaluation platform of our choice is an MIPS processor whose ISA is extended by the proposed redundant-digit instructions. Several digital signal processing benchmarks are utilized as the source of the baseline MIPS codes, which are converted (via the aforementioned algorithm) to the equivalent mixed binary/redundant-digit codes. Our experiments, as such, show up to 26\% energy and 44\% EDP savings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Andersson:2018:SAT, author = "BJ{\"o}rn Andersson and Hyoseung Kim and Dionisio {De Niz} and Mark Klein and Ragunathan (Raj) Rajkumar and John Lehoczky", title = "Schedulability Analysis of Tasks with Corunner-Dependent Execution Times", journal = j-TECS, volume = "17", number = "3", pages = "71:1--71:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3203407", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Consider fixed-priority preemptive partitioned scheduling of constrained-deadline sporadic tasks on a multiprocessor. A task generates a sequence of jobs and each job has a deadline that must be met. Assume tasks have Corunner-dependent execution times; i.e., the execution time of a job J depends on the set of jobs that happen to execute (on other processors) at instants when J executes. We present a model that describes Corunner-dependent execution times. For this model, we show that exact schedulability testing is co-NP-hard in the strong sense. Facing this complexity, we present a sufficient schedulability test, which has pseudo-polynomial-time complexity if the number of processors is fixed. We ran experiments with synthetic software benchmarks on a quad-core Intel multicore processor with the Linux/RK operating system and found that for each task, its maximum measured response time was bounded by the upper bound computed by our theory.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vasilios:2018:CSC, author = "Kelefouras Vasilios and Keramidas Georgios and Voros Nikolaos", title = "Combining Software Cache Partitioning and Loop Tiling for Effective Shared Cache Management", journal = j-TECS, volume = "17", number = "3", pages = "72:1--72:??", month = jun, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3202663", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:35 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "One of the biggest challenges in multicore platforms is shared cache management, especially for data-dominant applications. Two commonly used approaches for increasing shared cache utilization are cache partitioning and loop tiling. However, state-of-the-art compilers lack efficient cache partitioning and loop tiling methods for two reasons. First, cache partitioning and loop tiling are strongly coupled together, and thus addressing them separately is simply not effective. Second, cache partitioning and loop tiling must be tailored to the target shared cache architecture details and the memory characteristics of the corunning workloads. To the best of our knowledge, this is the first time that a methodology provides (1) a theoretical foundation in the above-mentioned cache management mechanisms and (2) a unified framework to orchestrate these two mechanisms in tandem (not separately). Our approach manages to lower the number of main memory accesses by an order of magnitude keeping at the same time the number of arithmetic/addressing instructions to a minimal level. We motivate this work by showcasing that cache partitioning, loop tiling, data array layouts, shared cache architecture details (i.e., cache size and associativity), and the memory reuse patterns of the executing tasks must be addressed together as one problem, when a (near)-optimal solution is requested. To this end, we present a search space exploration analysis where our proposal is able to offer a vast deduction in the required search space.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2018:EEC, author = "Sandeep K. Shukla", title = "Editorial: Early Career Researchers in Embedded Computing", journal = j-TECS, volume = "17", number = "4", pages = "73:1--73:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241724", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vatanparvar:2018:DAB, author = "Korosh Vatanparvar and Mohammad Abdullah {Al Faruque}", title = "Design and Analysis of Battery-Aware Automotive Climate Control for Electric Vehicles", journal = j-TECS, volume = "17", number = "4", pages = "74:1--74:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3203408", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Electric Vehicles (EV) as a zero-emission means of transportation encounter challenges in battery design that cause a range anxieties for the drivers. Besides the electric motor, the Heating, Ventilation, and Air Conditioning (HVAC) system is another major contributor to the power consumption that may influence the EV battery lifetime and driving range. In the state-of-the-art methodologies for battery management systems, the battery performance is monitored and improved. While in the automotive climate control, the passenger's thermal comfort is the main objective. Hence, the influence of the HVAC power on the battery behavior for the purpose of jointly optimized battery management and climate control has not been considered. In this article, we propose an automotive climate control methodology that is aware of the battery behavior and performance, while maintaining the passenger's thermal comfort. In our methodology, battery parameters and cabin temperature are modeled and estimated, and the HVAC utilization is optimized and adjusted with respect to the electric motor and HVAC power requests. Therefore, the battery stress reduces, while the cabin temperature is maintained by predicting and optimizing the system states in the near-future. We have implemented our methodology and compared its performance to the state-of-the-art in terms of battery lifetime improvement and energy consumption reduction. We have also conducted experiments and analyses to explore multiple control window sizes, drive profiles, ambient temperatures, and modeling error rates in the methodology. It is shown that our battery-aware climate control can extend the battery lifetime by up to 13.2\% and reduce the energy consumption by up to 14.4\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2018:MAC, author = "Wen Pan and Tao Xie", title = "A Mirroring-Assisted Channel-{RAID5} {SSD} for Mobile Applications", journal = j-TECS, volume = "17", number = "4", pages = "75:1--75:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3209625", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Simply applying an existing redundant array of independent disks (RAID) technique to enhance data reliability within a single solid-state drive for safety-critical mobile applications significantly degrades performance. In this article, we first propose a new RAID5 architecture called channel-RAID5 with mirroring (CR5M) to alleviate the performance degradation problem. Next, an associated data reconstruction strategy called mirroring-assisted channel-level reconstruction (MCR) is developed to further shrink the window of vulnerability. Experimental results demonstrate that compared with channel-RAID5 (CR5), CR5M improves performance up to 40.2\%. Compared with disk-oriented reconstruction, a traditional data reconstruction scheme, MCR on average improves data recovery speed by 7.5\% while delivering a similar performance during reconstruction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Omar:2018:DRH, author = "Hamza Omar and Qingchuan Shi and Masab Ahmad and Halit Dogan and Omer Khan", title = "Declarative Resilience: a Holistic Soft-Error Resilient Multicore Architecture that Trades off Program Accuracy for Efficiency", journal = j-TECS, volume = "17", number = "4", pages = "76:1--76:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3210559", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To protect multicores from soft-error perturbations, research has explored various resiliency schemes that provide high soft-error coverage. However, these schemes incur high performance and energy overheads. We observe that not all soft-error perturbations affect program correctness, and some soft-errors only affect program accuracy, i.e., the program completes with certain acceptable deviations from error free outcome. Thus, it is practical to improve processor efficiency by trading off resiliency overheads with program accuracy. This article proposes the idea of declarative resilience that selectively applies strong resiliency schemes for code regions that are crucial for program correctness (crucial code) and lightweight resiliency for code regions that are susceptible to program accuracy deviations as a result of soft-errors (non-crucial code). At the application level, crucial and non-crucial code is identified based on its impact on the program outcome. A cross-layer architecture enables efficient resilience along with holistic soft-error coverage. Only program accuracy is compromised in the worst-case scenario of a soft-error strike during non-crucial code execution. For a set of machine-learning and graph analytic benchmarks, declarative resilience reduces performance overhead over a state-of-the-art system that applies strong resiliency for all program code regions from $ \approx 1.43 \times $ to $ \approx 1.2 \times $.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2018:SLL, author = "Guan Wang and Chuanqi Zang and Lei Ju and Mengying Zhao and Xiaojun Cai and Zhiping Jia", title = "Shared Last-Level Cache Management and Memory Scheduling for {GPGPUs} with Hybrid Main Memory", journal = j-TECS, volume = "17", number = "4", pages = "77:1--77:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3230643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Memory intensive workloads become increasingly popular on general purpose graphics processing units (GPGPUs), and impose great challenges on the GPGPU memory subsystem design. On the other hand, with the recent development of non-volatile memory (NVM) technologies, hybrid memory combining both DRAM and NVM achieves high performance, low power, and high density simultaneously, which provides a promising main memory design for GPGPUs. In this article, we explore the shared last-level cache management for GPGPUs with consideration of the underlying hybrid main memory. To improve the overall memory subsystem performance, we exploit the characteristics of both the asymmetric read/write latency of the hybrid main memory architecture, as well as the memory coalescing feature of GPGPUs. In particular, to reduce the average cost of L2 cache misses, we prioritize cache blocks from DRAM or NVM based on observations that operations to NVM part of main memory have a large impact on the system performance. Furthermore, the cache management scheme also integrates the GPU memory coalescing and cache bypassing techniques to improve the overall system performance. To minimize the impact of memory divergence behaviors among simultaneously executed groups of threads, we propose a hybrid main memory and warp aware memory scheduling mechanism for GPGPUs. Experimental results show that in the context of a hybrid main memory system, our proposed L2 cache management policy and memory scheduling mechanism improve performance by 15.69\% on average for memory intensive benchmarks, whereas the maximum gain can be up to 29\% and achieve an average memory subsystem energy reduction of 21.27\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2018:DFM, author = "Xiaoxuan Liang and Zhangqin Huang and Shengqi Yang and Lanxin Qiu", title = "Device-Free Motion \& Trajectory Detection via {RFID}", journal = j-TECS, volume = "17", number = "4", pages = "78:1--78:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3230644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Compared with traditional methods that employ inertial sensors or wireless sensors, device-free approaches do not require that people carry devices, and they are considered a useful technique for indoor navigation and posture recognition. However, few existing methods can detect the trajectory and movements of humans at the same time. In this study, we propose a scheme called PADAR for addressing these two problems simultaneously by using passive radio frequency identification (RFID) tags but without attaching them to the human body. The idea is based on the principle of radio tomographic imaging, where the variance in a tag's backscattered radio frequency signal strength is influenced by human movement. We integrated a commodity off-the-shelf RFID reader with a two-dimensional phased array antenna and a matrix of passive tags to evaluate the performance of our scheme. We conducted experiments in a simulated indoor environment. The experimental results showed that PADAR achieved an accuracy of over 70\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ji:2018:ACP, author = "Kecheng Ji and Ming Ling and Longxing Shi and Jianping Pan", title = "An Analytical Cache Performance Evaluation Framework for Embedded Out-of-Order Processors Using Software Characteristics", journal = j-TECS, volume = "17", number = "4", pages = "79:1--79:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233182", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Utilizing analytical models to evaluate proposals or provide guidance in high-level architecture decisions is been becoming more and more attractive. A certain number of methods have emerged regarding cache behaviors and quantified insights in the last decade, such as the stack distance theory and the memory level parallelism (MLP) estimations. However, prior research normally oversimplified the factors that need to be considered in out-of-order processors, such as the effects triggered by reordered memory instructions, and multiple dependences among memory instructions, along with the merged accesses in the same MSHR entry. These ignored influences actually result in low and unstable precisions of recent analytical models. By quantifying the aforementioned effects, this article proposes a cache performance evaluation framework equipped with three analytical models, which can more accurately predict cache misses, MLPs, and the average cache miss service time, respectively. Similar to prior studies, these analytical models are all fed with profiled software characteristics in which case the architecture evaluation process can be accelerated significantly when compared with cycle-accurate simulations. We evaluate the accuracy of proposed models compared with gem5 cycle-accurate simulations with 16 benchmarks chosen from Mobybench Suite 2.0, Mibench 1.0, and Mediabench II. The average root mean square errors for predicting cache misses, MLPs, and the average cache miss service time are around 4\%, 5\%, and 8\%, respectively. Meanwhile, the average error of predicting the stall time due to cache misses by our framework is as low as 8\%. The whole cache performance estimation can be sped by about 15 times versus gem5 cycle-accurate simulations and 4 times when compared with recent studies. Furthermore, we have shown and studied the insights between different performance metrics and the reorder buffer sizes by using our models. As an application case of the framework, we also demonstrate how to use our framework combined with McPAT to find out Pareto optimal configurations for cache design space explorations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ara:2018:SAM, author = "Hadi Alizadeh Ara and Amir Behrouzian and Martijn Hendriks and Marc Geilen and Dip Goswami and Twan Basten", title = "Scalable Analysis for Multi-Scale Dataflow Models", journal = j-TECS, volume = "17", number = "4", pages = "80:1--80:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233183", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multi-scale dataflow models have actors acting at multiple granularity levels, e.g., a dataflow model of a video processing application with operations on frame, line, and pixel level. The state of the art timing analysis methods for both static and dynamic dataflow types aggregate the behaviours across all granularity levels into one, often large iteration, which is repeated without exploiting the structure within such an iteration. This poses scalability issues to dataflow analysis, because behaviour of the large iteration is analysed by some form of simulation that involves a large number of actor firings. We take a fresh perspective of what is happening inside the large iteration. We take advantage of the fact that the iteration is a sequence of smaller behaviours, each captured in a scenario, that are typically repeated many times. We use the (max,+) linear model of dataflow to represent each of the scenarios with a matrix. This allows a compositional worst-case throughput analysis of the repeated scenarios by raising the matrices to the power of the number of repetitions, which scales logarithmically with the number of repetitions, whereas the existing throughput analysis scales linearly. We moreover provide the first exact worst-case latency analysis for scenario-aware dataflow. This compositional latency analysis also scales logarithmically when applied to multi-scale dataflow models. We apply our new throughput and latency analysis to several realistic applications. The results confirm that our approach provides a fast and accurate analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Altawy:2018:SLT, author = "Riham Altawy and Raghvendra Rohit and Morgan He and Kalikinkar Mandal and Gangqiang Yang and Guang Gong", title = "{SLISCP-light}: Towards Hardware Optimized Sponge-specific Cryptographic Permutations", journal = j-TECS, volume = "17", number = "4", pages = "81:1--81:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3233245", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The emerging areas in which highly resource constrained devices are interacting wirelessly to accomplish tasks have led manufacturers to embed communication systems in them. Tiny low-end devices such as sensor networks nodes and Radio Frequency Identification (RFID) tags are of particular importance due to their vulnerability to security attacks, which makes protecting their communication privacy and authenticity an essential matter. In this work, we present a lightweight do-it-all cryptographic design that offers the basic underlying functionalities to secure embedded communication systems in tiny devices. Specifically, we revisit the design approach of the sLiSCP family of lightweight cryptographic permutations, which was proposed in SAC 2017. sLiSCP is designed to be used in a unified duplex sponge construction to provide minimal overhead for multiple cryptographic functionalities within one hardware design. The design of sLiSCP follows a 4-subblock Type-2 Generalized Feistel-like Structure (GFS) with unkeyed round-reduced Simeck as the round function, which are extremely efficient building blocks in terms of their hardware area requirements. In SLISCP-light, we tweak the GFS design and turn it into an elegant Partial Substitution-Permutation Network construction, which further reduces the hardware areas of the SLISCP permutations by around 16\% of their original values. The new design also enhances the bit diffusion and algebraic properties of the permutations and enables us to reduce the number of steps, thus achieving a better throughput in both the hashing and authentication modes. We perform a thorough security analysis of the new design with respect to its diffusion, differential and linear, and algebraic properties. For SLISCP-light-192, we report parallel implementation hardware areas of 1,820 (respectively, 1,892)GE in CMOS 65 nm (respectively, 130 nm) ASIC. The areas for SLISCP-light-256 are 2,397 and 2,500GE in CMOS 65 nm and 130 nm ASIC, respectively. Overall, the unified duplex sponge mode of SLISCP-light-192, which provides (authenticated) encryption and hashing functionalities, satisfies the area (1,958GE), power (3.97 $ \mu $W), and throughput (44.4kbps) requirements of passive RFID tags.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2018:ENA, author = "Sandeep K. Shukla", title = "Editorial: Need for Artifact Verified Articles in {{\booktitle{ACM Transactions}}}", journal = j-TECS, volume = "17", number = "5", pages = "82:1--82:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3282437", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3282437", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kalayappan:2018:PAH, author = "Rajshekar Kalayappan and Smruti R. Sarangi", title = "Providing Accountability in Heterogeneous Systems-on-Chip", journal = j-TECS, volume = "17", number = "5", pages = "83:1--83:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241048", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3241048", abstract = "When modern systems-on-chip (SoCs), containing designs from different organizations, miscompute or underperform in the field, discerning the responsible component is a non-trivial task. A perfectly accountable system is one in which the on-chip component at fault is always unambiguously detected. The achievement of accountability can be greatly aided by the collection of runtime information that captures the events in the system that led to the error. Such information collection must be fair and impartial to all parties. In this article, we prove that logging messages communicated between components from different organizations is sufficient to provide accountability, provided the logs are authentic. We then construct a solution based on this premise, with an on-chip trusted auditing system to authenticate the logs. We present a thorough design of the auditing system, and demonstrate that its performance overhead is a mere 0.49\%, and its area overhead is a mere 0.194\% (in a heterogeneous 48 core, 400 mm$^2$ chip). We also demonstrate the viability of this solution using three representative bugs found in popular commercial SoCs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhuiyan:2018:EER, author = "Ashikahmed Bhuiyan and Zhishan Guo and Abusayeed Saifullah and Nan Guan and Haoyi Xiong", title = "Energy-Efficient Real-Time Scheduling of {DAG} Tasks", journal = j-TECS, volume = "17", number = "5", pages = "84:1--84:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3241049", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3241049", abstract = "This work studies energy-aware real-time scheduling of a set of sporadic Directed Acyclic Graph (DAG) tasks with implicit deadlines. While meeting all real-time constraints, we try to identify the best task allocation and execution pattern such that the average power consumption of the whole platform is minimized. To our knowledge, this is the first work that addresses the power consumption issue in scheduling multiple DAG tasks on multi-cores and allows intra-task processor sharing. First, we adapt the decomposition-based framework for federated scheduling and propose an energy-sub-optimal scheduler. Then, we derive an approximation algorithm to identify processors to be merged together for further improvements in energy-efficiency. The effectiveness of the proposed approach is evaluated both theoretically via approximation ratio bounds and also experimentally through simulation study. Experimental results on randomly generated workloads show that our algorithms achieve an energy saving of 60\% to 68\% compared to existing DAG task schedulers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wei:2018:SAE, author = "Yi-Hung Wei and Quan Leng and Wei-Ju Chen and Aloysius K. Mok and Song Han", title = "Schedule Adaptation for Ensuring Reliability in {RT-WiFi}-Based Networked Embedded Systems", journal = j-TECS, volume = "17", number = "5", pages = "85:1--85:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3236011", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3236011", abstract = "With the ever-growing interests in applying wireless technologies for networked embedded systems to serve as the communication fabric, many real-time wireless technologies have been recently developed to support time-critical sensing and control applications. We proposed in previous work the RT-WiFi protocol that provides real-time high-speed predictable data delivery and enables designs to meet time-critical industrial needs. However, without explicit reliability enforcement mechanisms, our previous RT-WiFi design is either subject to uncontrolled packet loss due to noise and other interferences or may suffer from inefficient communication channel usage. In this article, we explicitly consider interference from both Wi-Fi and non-Wi-Fi based interference sources and propose two sets of effective solutions for reliable data transmissions in RT-WiFi-based networked embedded systems. To improve reliability against general non-Wi-Fi based interference, based on rate adaptation and retransmission techniques, we present an optimal real-time rate adaption algorithm together with a communication link scheduler that has low network management overhead. A novel technique called overbooking is introduced to further improve the schedulability of the communication link scheduler while maintaining the required communication reliability. For Wi-Fi-based interference, we present mechanisms that utilize virtual carrier sensing to provide reliable data transmission while co-existing with regular Wi-Fi networks. We have implemented the proposed algorithms in the RT-WiFi network management framework and demonstrated the system performance with a series of experiments.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sotiriou-Xanthopoulos:2018:OBV, author = "Efstathios Sotiriou-Xanthopoulos and Leonard Masing and Sotirios Xydis and Kostas Siozios and J{\"u}rgen Becker and Dimitrios Soudris", title = "{OpenCL}-based Virtual Prototyping and Simulation of Many-Accelerator Architectures", journal = j-TECS, volume = "17", number = "5", pages = "86:1--86:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3242179", abstract = "Heterogeneous architectures featuring multiple hardware accelerators have been proposed as a promising solution for meeting the ever-increasing performance and power requirements of embedded systems. However, the existence of numerous design parameters may result in different architectural schemes and thus in extra design effort. To address this issue, OpenCL-based frameworks have been recently utilized for FPGA programming, to enable the portability of a source code to multiple architectures. However, such OpenCL frameworks focus on RTL design, thus not enabling rapid prototyping and abstracted modeling of complex systems. Virtual Prototyping aims to overcome this problem by enabling the system modeling in higher abstraction levels. This article combines the benefits of OpenCL and Virtual Prototyping, by proposing an OpenCL-based prototyping framework for data-parallel many-accelerator systems, which (a) creates a SystemC Virtual Platform from OpenCL, (b) provides a co-simulation environment for the host and the Virtual Platform, (c) offers memory and interconnection models for parallel data processing, and (d) enables the system evaluation with alternative real number representations (e.g., fixed-point or 16-bit floating-point).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sababha:2018:RBF, author = "Belal H. Sababha and Yazan A. Alqudah", title = "A Reconfiguration-Based Fault-Tolerant Anti-Lock Brake-by-Wire System", journal = j-TECS, volume = "17", number = "5", pages = "87:1--87:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3242178", abstract = "Anti-Lock Braking Systems (ABS) and Brake-by-Wire Systems (BBW) are safety-critical applications by nature. Such systems are required to demonstrate high degrees of dependability. Fault-tolerance is the primary means to achieve dependability at runtime and has been an active research area for decades. Fault-tolerance is usually achieved in traditional embedded computing systems through redundancy and voting methods. In such systems, hardware units, actuators, sensors, and communication networks are replicated where special voters vote against faulty units. In addition to traditional hardware and software redundancy, hybrid and reconfiguration-based approaches to fault-tolerance are evolving. In this article, we present a reconfiguration-based fault-tolerant approach to achieve high dependability in ABS BBW braking systems. The proposed architecture makes use of other components of less safety-critical systems to maintain high dependability in the more safety-critical systems. This is achieved by migrating safety-critical software tasks from embedded computer hardware that runs into a malfunction to other embedded computing hardware running less-critical software tasks. Or by using a different configuration in terms of the used speed sensors and type of ABS. The proposed architecture is on average 20\% more reliable than conventional ABS architectures assuming equal reliabilities of different components.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jin:2018:PAR, author = "Xi Jin and Nan Guan and Changqing Xia and Jintao Wang and Peng Zeng", title = "Packet Aggregation Real-Time Scheduling for Large-Scale {WIA--PA} Industrial Wireless Sensor Networks", journal = j-TECS, volume = "17", number = "5", pages = "88:1--88:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3266228", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3266228", abstract = "The IEC standard WIA-PA is a communication protocol for industrial wireless sensor networks. Its special features, including a hierarchical topology, hybrid centralized-distributed management and packet aggregation make it suitable for large-scale industrial wireless sensor networks. Industrial systems place large real-time requirements on wireless sensor networks. However, the WIA-PA standard does not specify the transmission methods, which are vital to the real-time performance of wireless networks, and little work has been done to address this problem. In this article, we propose a real-time aggregation scheduling method for WIA-PA networks. First, to satisfy the real-time constraints on dataflows, we propose a method that combines the real-time theory with the classical bin-packing method to aggregate original packets into the minimum number of aggregated packets. The simulation results indicate that our method outperforms the traditional bin-packing method, aggregating up to 35\% fewer packets, and improves the real-time performance by up to 10\%. Second, to make it possible to solve the scheduling problem of WIA-PA networks using the classical scheduling algorithms, we transform the ragged time slots of WIA-PA networks to a universal model. In the simulation, a large number of WIA-PA networks are randomly generated to evaluate the performances of several real-time scheduling algorithms. By comparing the results, we obtain that the earliest deadline first real-time scheduling algorithm is the preferred method for WIA-PA networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Weichslgartner:2018:DTR, author = "Andreas Weichslgartner and Stefan Wildermann and Deepak Gangadharan and Michael Gla{\ss} and J{\"u}rgen Teich", title = "A Design--Time\slash Run-Time Application Mapping Methodology for Predictable Execution Time in {MPSoCs}", journal = j-TECS, volume = "17", number = "5", pages = "89:1--89:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3274665", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274665", abstract = "Executing multiple applications on a single MPSoC brings the major challenge of satisfying multiple quality requirements regarding real-time, energy, and so on. Hybrid application mapping denotes the combination of design-time analysis with run-time application mapping. In this article, we present such a methodology, which comprises a design space exploration coupled with a formal performance analysis. This results in several resource reservation configurations, optimized for multiple objectives, with verified real-time guarantees for each individual application. The Pareto-optimal configurations are handed over to run-time management, which searches for a suitable mapping according to this information. To provide any real-time guarantees, the performance analysis needs to be composable and the influence of the applications on each other has to be bounded. We achieve this either by spatial or a novel temporal isolation for tasks and by exploiting composable networks-on-chip (NoCs). With the proposed temporal isolation, tasks of different applications can be mapped to the same resource, while, with spatial isolation, one computing resource can be exclusively used by only one application. The experiments reveal that the success rate in finding feasible application mappings can be increased by the proposed temporal isolation by up to 30\% and energy consumption can be reduced compared to spatial isolation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hassan:2018:EID, author = "Mohamed Hassan and Anirudh M. Kaushik and Hiren Patel", title = "Exposing Implementation Details of Embedded {DRAM} Memory Controllers through Latency-based Analysis", journal = j-TECS, volume = "17", number = "5", pages = "90:1--90:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3274281", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274281", abstract = "We explore techniques to reverse-engineer DRAM embedded memory controllers (MCs), including page policies, address mapping, and command arbitration. There are several benefits to knowing this information: They allow tightening worst-case bounds of embedded systems and platform-aware optimizations at the operating system, source-code, and compiler levels. We develop a latency-based analysis, which we use to devise algorithms and C programs to extract MC properties. We show the effectiveness of the proposed approach by reverse-engineering the MC details in the XUPV5-LX110T Xilinx platform. Furthermore, to cover a breadth of policies, we use a simulation framework and document our findings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2019:EES, author = "Sandeep K. Shukla", title = "Editorial: Embedded Security Challenge: Cyber Security Contests in the Embedded Computing Domain", journal = j-TECS, volume = "17", number = "6", pages = "91:1--91:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3293502", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2019:DOS, author = "Hui Sun and Jianzhong Huang and Xiao Qin and Changsheng Xie", title = "{DLSpace}: Optimizing {SSD} Lifetime via An Efficient Distributed Log Space Allocation Strategy", journal = j-TECS, volume = "17", number = "6", pages = "92:1--92:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3284749", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3284749", abstract = "Due to limited numbers of program/erase cycles (i.e., P/Es) of NAND Flash, excessive out-of-place update and erase-before-write operations wear out these P/Es during garbage collections, which adversely shorten solid state disk (i.e., SSD) lifetime. The log space in NAND Flash space of an SSD performs as an updated page's buffer, which lowers garbage-collection frequency while reducing consumption of P/Es to extend SSD lifetime. In this article, we propose DLSpace, a novel distributed log space allocation strategy named distributed log space, which divides log space into block-level log space and page-level log space to significantly optimize SSD lifetime. DLSpace's log page space is dedicated to data pages in a data block. Such log page space only buffers page-update operations in this data block; thereby the use of log blocks for postponing garbage collection delays. DLSpace is conducive to fully utilizing pages in data and log blocks to avoid erasures of blocks with free pages. Consequently, DLSpace decreases write amplification by reducing excessive valid page-rewrite and block-erase operations under random-write-intensive workloads. We carried out quantitative research on the extension of SSD lifetime by virtue of three metrics (i.e., write amplification, the number of block-erase operations, and the delay time before the first garbage collection occurring). Experimental results reveal that compared with the existing t raditional allocation strategy for l og space (i.e., TLSpace), DLSpace reduces write amplification and the number of erase operations by up to 55.2\% and 64.1\% to the most extent, respectively. DLSpace also extends TLSpace's delay time of garbage collections by 73.3\% to optimize SSD lifetime.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Balsamo:2019:MPN, author = "Domenico Balsamo and Benjamin J. Fletcher and Alex S. Weddell and Giorgos Karatziolas and Bashir M. Al-Hashimi and Geoff V. Merrett", title = "Momentum: Power-neutral Performance Scaling with Intrinsic {MPPT} for Energy Harvesting Computing Systems", journal = j-TECS, volume = "17", number = "6", pages = "93:1--93:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3281300", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent research has looked to supplement or even replace the batteries in embedded computing systems with energy harvesting, where energy is derived from the device's environment. However, such supplies are generally unpredictable and highly variable, and hence systems typically incorporate large external energy buffers (e.g., supercapacitors) to sustain computation; however, these pose environmental issues and increase system size and cost. This article proposes Momentum, a general power-neutral methodology, with intrinsic system-wide maximum power point tracking, that can be applied to a wide range of different computing systems, where the system dynamically scales its performance (and hence power consumption) to optimize computational progress depending on the power availability. Momentum enables the system to operate around an efficient operating voltage, maximizing forward application execution, without adding any external tracking or control units. This methodology combines at runtime (1) a hierarchical control strategy that utilizes available power management controls (such as dynamic voltage and frequency scaling, and core hot-plugging) to achieve efficient power-neutral operation; (2) a software-based maximum power point tracking scheme (unlike existing approaches, this does not require any additional hardware), which adapts the system power consumption so that it can work at the optimal operating voltage, considering the efficiency of the entire system rather than just the energy harvester; and (3) experimental validation on two different scales of computing system: a low power microcontroller (operating from the already-present 4.7 $ \mu $F decoupling capacitance) and a multi-processor system-on-chip (operating from 15.4mF added capacitance). Experimental results from both a controlled supply and energy harvesting source show that Momentum operates correctly on both platforms and exhibits improvements in forward application execution of up to 11\% when compared to existing power-neutral approaches and 46\% compared to existing static approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sheikh:2019:EEM, author = "Saad Zia Sheikh and Muhammad Adeel Pasha", title = "Energy-Efficient Multicore Scheduling for Hard Real-Time Systems: a Survey", journal = j-TECS, volume = "17", number = "6", pages = "94:1--94:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3291387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As real-time embedded systems are evolving in scale and complexity, the demand for a higher performance at a minimum energy consumption has become a necessity. Consequently, many embedded systems are now adopting multicore architectures into their design. However, scheduling on multicores is not a trivial task and scheduling to minimize the energy consumption further increases the complexity of the problem. This problem is especially aggravated for hard real-time systems where failure to meet a deadline can be catastrophic. Such scheduling algorithms yearn for a polynomial time complexity for the task-to-core assignment problem with an objective to minimize the overall energy consumption. There is now a trend toward heterogeneous multicores where cores differ in power, performance, and architectural capabilities. The desired performance and energy consumption is attained by assigning a task to the core that is best suited for it. In this article, we present a survey on energy-efficient multicore scheduling algorithms for hard real-time systems. We summarize various algorithms reported in the literature and classify them based on Partitioned, Semi-Partitioned, and Global scheduling techniques for both homogeneous and heterogeneous multicores. We also present a detailed discussion on various open issues within this domain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xie:2019:EWA, author = "Guoqi Xie and Gang Zeng and Ryo Kurachi and Hiroaki Takada and Renfa Li and Keqin Li", title = "Exact {WCRT} Analysis for Message-Processing Tasks on Gateway-Integrated In-Vehicle {CAN} Clusters", journal = j-TECS, volume = "17", number = "6", pages = "95:1--95:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3284178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A typical automotive integrated architecture is a controller area network (CAN) cluster integrated by a central gateway. This study proposes a novel and exact worst-case response time (WCRT) analysis method for message-processing tasks in the gateway. We first propose a round search method to obtain lower bound on response time (LBRT) and upper bound on response time (UBRT), respectively. We then obtain the exact WCRT belonging to the scope of the LBRT and UBRT with an effective non-exhaustive exploration. Experimental results on a real CAN message set reveal that the proposed exact analysis method can reduce 99.99999\% combinations on large-scale CAN clusters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Feng:2019:EUH, author = "Zhiwei Feng and Nan Guan and Mingsong Lv and Weichen Liu and Qingxu Deng and Xue Liu and Wang Yi", title = "An Efficient {UAV} Hijacking Detection Method Using Onboard Inertial Measurement Unit", journal = j-TECS, volume = "17", number = "6", pages = "96:1--96:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289390", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3289390", abstract = "With the fast growth of civil drones, their security problems meet significant challenges. A commercial drone may be hijacked by a GPS-spoofing attack for illegal activities, such as terrorist attacks. The target of this article is to develop a technique that only uses onboard gyroscopes to determine whether a drone has been hijacked. Ideally, GPS data and the angular velocities measured by gyroscopes can be used to estimate the acceleration of a drone, which can be further compared with the measurement of the accelerometer to detect whether a drone has been hijacked. However, the detection results may not always be accurate due to some calculation and measurement errors, especially when no hijacking occurs in curve trajectory situations. To overcome this, in this article, we propose a novel and simple method to detect hijacking only based on gyroscopes' measurements and GPS data, without using any accelerometer in the detection procedure. The computational complexity of our method is very low, which is suitable to be implemented in the drones with micro-controllers. On the other hand, the proposed method does not rely on any accelerometer to detect attacks, which means it receives less information in the detection procedure and may reduce the results accuracy in some special situations. While the previous method can compensate for this flaw, the high detection results also can be guaranteed by using the above two methods. Experiments with a quad-rotor drone are conducted to show the effectiveness of the proposed method and the combination method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yan:2019:CAR, author = "Yin Yan and Girish Gokul and Karthik Dantu and Steven Y. Ko and Lukasz Ziarek and Jan Vitek", title = "Can {Android} Run on Time? {Extending} and Measuring the {Android} Platform's Timeliness", journal = j-TECS, volume = "17", number = "6", pages = "97:1--97:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289257", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Time predictability is difficult to achieve in the complex, layered execution environments that are common in modern embedded devices such as smartphones. We explore adopting the Android programming model for a range of embedded applications that extends beyond mobile devices, under the constraint that changes to widely used libraries should be minimized. The challenges we explore include the interplay between real-time activities and the rest of the system, how to express the timeliness requirements of components, and how well those requirements can be met on stock embedded platforms. We detail the design and implementation of our modifications to the Android framework along with a real-time VM and OS, and we provide experimental data validating feasibility over five applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Abkenar:2019:GRU, author = "Amin B. Abkenar and Seng W. Loke and Arkady Zaslavsky and Wenny Rahayu", title = "{GroupSense}: Recognizing and Understanding Group Physical Activities using Multi-Device Embedded Sensing", journal = j-TECS, volume = "17", number = "6", pages = "98:1--98:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295747", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Human activity recognition using embedded mobile and embedded sensors is becoming increasingly important. Scaling up from individuals to groups, that is, Group Activity Recognition (GAR), has attracted significant attention recently. This article proposes a model and modeling language for GAR called GroupSense-L and a novel distributed middleware called GroupSense for mobile GAR. We implemented and tested GroupSense using smartphone sensors, smartwatch sensors, and embedded sensors in things, where we have a protocol for these different devices to exchange information required for GAR. A range of continuous group activities (from simple to fairly complex) illustrates our approach and demonstrates the feasibility of our model and richness of the proposed specialization. We then conclude with lessons learned for GAR and future work.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Derler:2019:GES, author = "Patricia Derler and Klaus Schneider and Jean-Pierre Talpin", title = "Guest Editorial: Special Issue of {ACM TECS on the ACM--IEEE International Conference on Formal Methods and Models for System Design (MEMOCODE 2017)}", journal = j-TECS, volume = "18", number = "1", pages = "1:1--1:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3292422", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3292422", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2019:EHF, author = "Sandeep K. Shukla", title = "Editorial: Human Factors in Embedded Computing", journal = j-TECS, volume = "18", number = "1", pages = "1:1--1:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3302888", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3302888", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nuzzo:2019:SAG, author = "Pierluigi Nuzzo and Jiwei Li and Alberto L. Sangiovanni-Vincentelli and Yugeng Xi and Dewei Li", title = "Stochastic Assume--Guarantee Contracts for Cyber-Physical System Design", journal = j-TECS, volume = "18", number = "1", pages = "2:1--2:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3243216", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3243216", abstract = "We present an assume-guarantee contract framework for cyber-physical system design under probabilistic requirements. Given a stochastic linear system and a set of requirements captured by bounded Stochastic Signal Temporal Logic (StSTL) contracts, we propose algorithms to check contract compatibility, consistency, and refinement, and generate a sequence of control inputs that satisfies a contract. We leverage encodings of the verification and control synthesis tasks into mixed integer optimization problems, and conservative approximations of probabilistic constraints that produce sound and tractable problem formulations. We illustrate the effectiveness of our approach on three case studies, including the design of controllers for aircraft power distribution networks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Plassan:2019:MMA, author = "Guillaume Plassan and Katell Morin-Allory and Dominique Borrione", title = "Mining Missing Assumptions from Counter-Examples", journal = j-TECS, volume = "18", number = "1", pages = "3:1--3:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3288759", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3288759", abstract = "During the formal functional verification of Register-Transfer Level designs, a false failure is often observed. Most of the time, this failure is caused by an underconstrained model. The analysis of the root cause for the verification error and the creation of missing assumptions are a significant time burden. In this article, we present a methodology to automatically mine these missing assumptions from counter-examples. First, multiple counter-examples are generated for the same property. Then, relevant behaviors are mined from the counter-examples. Finally, corresponding assumptions are filtered and a small amount is returned to the user for review.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fellner:2019:MBM, author = "Andreas Fellner and Willibald Krenn and Rupert Schlick and Thorsten Tarrach and Georg Weissenbacher", title = "Model-based, Mutation-driven Test-case Generation Via Heuristic-guided Branching Search", journal = j-TECS, volume = "18", number = "1", pages = "4:1--4:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3289256", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3289256", abstract = "This work introduces a heuristic-guided branching search algorithm for model-based, mutation-driven test-case generation. The algorithm is designed towards the efficient and computationally tractable exploration of discrete, non-deterministic models with huge state spaces. Asynchronous parallel processing is a key feature of the algorithm. The algorithm is inspired by the successful path planning algorithm Rapidly exploring Random Trees (RRT). We adapt RRT in several aspects towards test-case generation. Most notably, we introduce parametrized heuristics for start and successor state selection, as well as a mechanism to construct test cases from the data produced during the search. We implemented our algorithm in the existing test-case generation framework MoMuT. We present an extensive evaluation of the proposed heuristics and parameters of the algorithm, based on a diverse set of demanding models obtained in an industrial context. In total, we continuously utilized 128 CPU cores on three servers for several weeks to gather the experimental data presented. We show that branching search works well and the use of multiple heuristics is justified. With our new algorithm, we are now able to process models consisting of over 2,300 concurrent objects. To our knowledge, there is no other mutation-driven test-case generation tool that is able to process models of this magnitude.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Edwards:2019:CDC, author = "Stephen A. Edwards and Richard Townsend and Martha Barker and Martha A. Kim", title = "Compositional Dataflow Circuits", journal = j-TECS, volume = "18", number = "1", pages = "5:1--5:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3274280", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274280", abstract = "We present a technique for implementing dataflow networks as compositional hardware circuits. We first define an abstract dataflow model with unbounded buffers that supports data-dependent blocks (mux, demux, and nondeterministic merge); we then show how to faithfully implement such networks with bounded buffers and handshaking. Handshaking admits compositionality: our circuits can be connected with or without buffers, and combinational cycles arise only from a completely unbuffered cycle. While bounding buffer sizes can cause the system to deadlock prematurely, the system is guaranteed to produce the same, correct, data before then. Thus, unless the system deadlocks, inserting or removing buffers only affects its performance. We demonstrate how this enables design space to be explored.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reynolds:2019:MME, author = "Thomas N. Reynolds and Adam Procter and William L. Harrison and Gerard Allwein", title = "The Mechanized Marriage of Effects and Monads with Applications to High-assurance Hardware", journal = j-TECS, volume = "18", number = "1", pages = "6:1--6:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3274282", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274282", abstract = "Constructing high-assurance, secure hardware remains a challenge, because to do so relies on both a verifiable means of hardware description and implementation. However, production hardware description languages (HDL) lack the formal underpinnings required by formal methods in security. Still, there is no such thing as high-assurance systems without high-assurance hardware. We present a core calculus of secure hardware description with its formal semantics, security type system, and mechanization in Coq. This calculus is the core of the functional HDL, ReWire, shown in previous work to have useful applications in reconfigurable computing. This work supports a full-fledged, formal methodology for producing high-assurance hardware.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chattopadhyay:2019:QIL, author = "Sudipta Chattopadhyay and Moritz Beck and Ahmed Rezine and Andreas Zeller", title = "Quantifying the Information Leakage in Cache Attacks via Symbolic Execution", journal = j-TECS, volume = "18", number = "1", pages = "7:1--7:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3288758", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3288758", abstract = "Cache attacks allow attackers to infer the properties of a secret execution by observing cache hits and misses. But how much information can actually leak through such attacks? For a given program, a cache model, and an input, our CHALICE framework leverages symbolic execution to compute the amount of information that can possibly leak through cache attacks. At the core of CHALICE is a novel approach to quantify information leakage that can highlight critical cache side-channel leakage on arbitrary binary code. In our evaluation on real-world programs from OpenSSL and Linux GDK libraries, CHALICE effectively quantifies information leakage: For an AES-128 implementation on Linux, for instance, CHALICE finds that a cache attack can leak as much as 127 out of 128 bits of the encryption key.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2019:ERR, author = "Taeju Park and Kang G. Shin", title = "{EACAN}: Reliable and Resource-Efficient {CAN} Communications", journal = j-TECS, volume = "18", number = "1", pages = "8:1--8:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301309", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301309", abstract = "Worst-case-based timing verification for the controller area network (CAN) has been the bottleneck to efficient use of its bandwidth. Especially, this inefficiency comes from the worst-case transmission error rate (WCTER) when transmission errors are accounted for. To alleviate this inefficiency, we propose a runtime adaptation scheme, error-adaptive CAN (EACAN). EACAN observes the behavior of transmission errors at runtime, and reconfigures the message period based on the observation to meet the timing-failure requirement. We experimentally evaluate the bandwidth utilization of both EACAN- and WCTER-based verification, showing that the former improves the bandwidth utilization by 14\% over the latter.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pederson:2019:BCL, author = "Daniel J. Pederson and Christopher J. Quinkert and Muhammad A. Arafat and Jesse P. Somann and Jack D. Williams and Rebecca A. Bercich and Zhi Wang and Gabriel O. Albors and John G. R. Jefferys and Pedro P. Irazoqui", title = "The {Bionode}: a Closed-Loop Neuromodulation Implant", journal = j-TECS, volume = "18", number = "1", pages = "9:1--9:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301310", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301310", abstract = "Implantable closed-loop neuromodulation devices for use in long-term chronic studies in a lab or clinical trial are expensive to acquire and difficult to modify for specific use cases. This article documents the design and fabrication of a wireless implantable device using only commercially available off-the-shelf (COTS) components. This device, called the Bionode, can record and transmit up to four channels of biopotential data while simultaneously providing biphasic constant-current stimulation. The Bionode is a viable, low-cost, reusable, and easily modifiable research tool with clinical implications that has gained widespread use in various research projects at Purdue University.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Venkataramani:2019:SMM, author = "Vanchinathan Venkataramani and Mun Choon Chan and Tulika Mitra", title = "Scratchpad-Memory Management for Multi-Threaded Applications on Many-Core Architectures", journal = j-TECS, volume = "18", number = "1", pages = "10:1--10:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301308", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301308", abstract = "Contemporary many-core architectures, such as Adapteva Epiphany and Sunway TaihuLight, employ per-core software-controlled Scratchpad Memory (SPM) rather than caches for better performance-per-watt and predictability. In these architectures, a core is allowed to access its own SPM as well as remote SPMs through the Network-On-Chip (NoC). However, the compiler/programmer is required to explicitly manage the movement of data between SPMs and off-chip memory. Utilizing SPMs for multi-threaded applications is even more challenging, as the shared variables across the threads need to be placed appropriately. Accessing variables from remote SPMs with higher access latency further complicates this problem as certain links in the NoC may be heavily contended by multiple threads. Therefore, certain variables may need to be replicated in multiple SPMs to reduce the contention delay and/or the overall access time. We present Coordinated Data Management (CDM), a compile-time framework that automatically identifies shared/private variables and places them with replication (if necessary) to suitable on-chip or off-chip memory, taking NoC contention into consideration. We develop both an exact Integer Linear Programming (ILP) formulation as well as an iterative, scalable algorithm for placing the data variables in multi-threaded applications on many-core SPMs. Experimental evaluation on the Parallella hardware platform confirms that our allocation strategy reduces the overall execution time and energy consumption by $ 1.84 \times $ and $ 1.83 \times $, respectively, when compared to the existing approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rhisheekesan:2019:CFC, author = "Abhishek Rhisheekesan and Reiley Jeyapaul and Aviral Shrivastava", title = "Control Flow Checking or Not? (for Soft Errors)", journal = j-TECS, volume = "18", number = "1", pages = "11:1--11:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301311", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301311", abstract = "Huge leaps in performance and power improvements of computing systems are driven by rapid technology scaling, but technology scaling has also rendered computing systems susceptible to soft errors. Among the soft error protection techniques, Control Flow Checking (CFC) based techniques have gained a reputation of being lightweight yet effective. The main idea behind CFCs is to check if the program is executing the instructions in the right order. In order to validate the protection claims of existing CFCs, we develop a systematic and quantitative method to evaluate the protection achieved by CFCs using the metric of vulnerability. Our quantitative analysis indicates that existing CFC techniques are not only ineffective in providing protection from soft faults, but incur additional performance and power overheads. Our results show that software-only CFC protection schemes increase system vulnerability by 18\%--21\% with 17\%--38\% performance overhead and hybrid CFC protection increases vulnerability by 5\%. Although the vulnerability remains almost the same for hardware-only CFC protection, they incur overheads of design cost, area, and power due to the hardware modifications required for their implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Roy:2019:CPR, author = "Debapriya Basu Roy and Shivam Bhasin and Ivica Nikoli{\'c} and Debdeep Mukhopadhyay", title = "Combining {PUF} with {RLUTs}: a Two-party Pay-per-device {IP} Licensing Scheme on {FPGAs}", journal = j-TECS, volume = "18", number = "2", pages = "12:1--12:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301307", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301307", abstract = "With the popularity of modern FPGAs, the business of FPGA specific intellectual properties (IP) is expanding rapidly. This also brings in the concern of IP protection. FPGA vendors are making serious efforts toward IP protection, leading to standardization schemes like IEEE P1735. However, efficient techniques to prevent unauthorized overuse of IP still remain an open question. In this article, we propose a two-party IP protection scheme combining the re-configurable look-up table primitive of modern FPGAs with physically unclonable functions (PUF). The proposed scheme works with the assumption that the FPGA vendor provides the assurance of confidentiality and integrity of the developed IP. The proposed scheme is considerably lightweight compared to existing schemes, prevents overuse, and does not involve FPGA vendors or trusted third parties for IP licensing. The validation of the proposed scheme is done on MCNC'91 benchmark and third-party IPs like AES and lightweight MIPS processors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhong:2019:SHS, author = "Guanwen Zhong and Akshat Dubey and Cheng Tan and Tulika Mitra", title = "{Synergy}: an {HW\slash SW} Framework for High Throughput {CNNs} on Embedded Heterogeneous {SoC}", journal = j-TECS, volume = "18", number = "2", pages = "13:1--13:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301278", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301278", abstract = "Convolutional Neural Networks (CNN) have been widely deployed in diverse application domains. There has been significant progress in accelerating both their training and inference using high-performance GPUs, FPGAs, and custom ASICs for datacenter-scale environments. The recent proliferation of mobile and Internet of Things (IoT) devices have necessitated real-time, energy-efficient deep neural network inference on embedded-class, resource-constrained platforms. In this context, we present Synergy, an automated, hardware-software co-designed, pipelined, high-throughput CNN inference framework on embedded heterogeneous system-on-chip (SoC) architectures (Xilinx Zynq). Synergy leverages, through multi-threading, all the available on-chip resources, which includes the dual-core ARM processor along with the FPGA and the NEON Single-Instruction Multiple-Data (SIMD) engines as accelerators. Moreover, Synergy provides a unified abstraction of the heterogeneous accelerators (FPGA and NEON) and can adapt to different network configurations at runtime without changing the underlying hardware accelerator architecture by balancing workload across accelerators through work-stealing. Synergy achieves 7.3X speedup, averaged across seven CNN models, over a well-optimized software-only solution. Synergy demonstrates substantially better throughput and energy-efficiency compared to the contemporary CNN implementations on the same SoC architecture.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Guha:2019:SBS, author = "Krishnendu Guha and Debasri Saha and Amlan Chakrabarti", title = "Stigmergy-Based Security for {SoC} Operations From Runtime Performance Degradation of {SoC} Components", journal = j-TECS, volume = "18", number = "2", pages = "14:1--14:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301279", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301279", abstract = "The semiconductor design industry of the embedded era has embraced the globalization strategy for system on chip (SoC) design. This involves incorporation of various SoC components or intellectual properties (IPs), procured from various third-party IP (3PIP) vendors. However, trust of an SoC is challenged when a supplied IP is counterfeit or implanted with a Hardware Trojan Horse. Both roots of untrust may result in sudden performance degradation at runtime. None of the existing hardware security approaches organize the behavior of the IPs at the low level, to ensure timely completion of SoC operations. However, real-time SoC operations are always associated with a deadline, and a deadline miss due to sudden performance degradation of any of the IPs may jeopardize mission-critical applications. We seek refuge to the stigmergic behavior exhibited in insect colonies to propose a decentralized self-aware security approach. The self-aware security modules attached with each IP works based on the Observe-Decide-Act paradigm and not only detects vulnerability but also organizes behavior of the IPs dynamically at runtime so that the high-level objective of task completion before a deadline is ensured. Experimental validation and low overhead of our proposed security modules over various benchmark IPs and crypto SoCs depict the prospects of our proposed mechanism.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2019:CRU, author = "Alif Ahmed and Yuanwen Huang and Prabhat Mishra", title = "Cache Reconfiguration Using Machine Learning for Vulnerability-aware Energy Optimization", journal = j-TECS, volume = "18", number = "2", pages = "15:1--15:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309762", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309762", abstract = "Dynamic cache reconfiguration has been widely explored for energy optimization and performance improvement for single-core systems. Cache partitioning techniques are introduced for the shared cache in multicore systems to alleviate inter-core interference. While these techniques focus only on performance and energy, they ignore vulnerability due to soft errors. In this article, we present a static profiling based algorithm to enable vulnerability-aware energy-optimization for real-time multicore systems. Our approach can efficiently search the space of cache configurations and partitioning schemes for energy optimization while task deadlines and vulnerability constraints are satisfied. A machine learning technique has been employed to minimize the static profiling time without sacrificing the accuracy of results. Our experimental results demonstrate that our approach can achieve 19.2\% average energy savings compared with the base configuration, while drastically reducing the vulnerability (49.3\% on average) compared to state-of-the-art techniques. Furthermore, the machine learning technique enabled more than 10x speedup in static profiling time with a negligible prediction error of 3\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lentaris:2019:SMF, author = "George Lentaris and Konstantinos Maragos and Dimitrios Soudris and Xenophon Zabulis and Manolis Lourakis", title = "Single- and Multi-{FPGA} Acceleration of Dense Stereo Vision for Planetary Rovers", journal = j-TECS, volume = "18", number = "2", pages = "16:1--16:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3312743", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3312743", abstract = "Increased mobile autonomy is a vital requisite for future planetary exploration rovers. Stereo vision is a key enabling technology in this regard, as it can passively reconstruct in three dimensions the surroundings of a rover and facilitate the selection of science targets and the planning of safe routes. Nonetheless, accurate dense stereo algorithms are computationally demanding. When executed on the low-performance, radiation-hardened CPUs typically installed on rovers, slow stereo processing severely limits the driving speed and hence the science that can be conducted in situ. Aiming to decrease execution time while increasing the accuracy of stereo vision embedded in future rovers, this article proposes HW/SW co-design and acceleration on resource-constrained, space-grade FPGAs. In a top-down approach, we develop a stereo algorithm based on the space sweep paradigm, design its parallel HW architecture, implement it with VHDL, and demonstrate feasible solutions even on small-sized devices with our multi-FPGA partitioning methodology. To meet all cost, accuracy, and speed requirements set by the European Space Agency for this system, we customize our HW/SW co-processor by design space exploration and testing on a Mars-like dataset. Implemented on Xilinx Virtex technology, or European NG-MEDIUM devices, the FPGA kernel processes a $ 1, 120 \times 1, 120 $ stereo pair in 1.7s--3.1s, utilizing only 5.4--9.3 LUT6 and 200-312 RAMB18. The proposed system exhibits up to $ 32 \times $ speedup over desktop CPUs, or $ 2, 810 \times $ over space-grade LEON3, and achieves a mean reconstruction error less than 2cm up to 4m depth. Excluding errors exceeding 2cm (which are less than 4\% of the total), the mean error is under 8mm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Harb:2019:FIE, author = "Salah Harb and Moath Jarrah", title = "{FPGA} Implementation of the {ECC} Over {$ {\rm GF}(2^m) $} for Small Embedded Applications", journal = j-TECS, volume = "18", number = "2", pages = "17:1--17:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310354", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310354", abstract = "In this article, we propose a compact elliptic curve cryptographic core over GF($ 2^m$). The proposed architecture is based on the Lopez-Dahab projective point arithmetic operations. To achieve efficiency in resources usage, an iterative method that uses a ROM-based state machine is developed for the elliptic curve cryptography (ECC) point doubling and addition operations. The compact ECC core has been implemented using Virtex FPGA devices. The number of the required slices is 2,102 at 321MHz and 6,738 slices at 262MHz for different GF($ 2^m$). Extensive experiments were conducted to compare our solution to existing methods in the literature. Our compact core consumes less area than all previously proposed methods. It also provides an excellent performance for scalar multiplication. In addition, the ECC core is implemented in ASIC 0.18 $ \mu $ m CMOS technology, and the results show excellent performance. Therefore, our proposed ECC core method provides a balance in terms of speed, area, and power consumption. This makes the proposed design the right choice for cryptosystems in limited-resource devices such as cell phones, IP cores of SoCs, and smart cards. Moreover, side-channel attack resistance is implemented to prevent power analysis.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Arghavani:2019:CLB, author = "Abbas Arghavani and Haibo Zhang and Zhiyi Huang and Yawen Chen", title = "{Chimp}: a Learning-based Power-aware Communication Protocol for Wireless Body Area Networks", journal = j-TECS, volume = "18", number = "2", pages = "18:1--18:??", month = apr, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309763", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309763", abstract = "Radio links in wireless body area networks (WBANs) commonly experience highly time-varying attenuation due to the dynamic network topology and frequent occlusions caused by body movements, making it challenging to design a reliable, energy-efficient, and real-time communication protocol for WBANs. In this article, we present Chimp, a learning-based power-aware communication protocol in which each sending node can self-learn the channel quality and choose the best transmission power level to reduce energy consumption and interference range while still guaranteeing high communication reliability. Chimp is designed based on learning automata that uses only the acknowledgment packets and motion data from a local gyroscope sensor to infer the real-time channel status. We design a new cost function that takes into account the energy consumption, communication reliability and interference and develop a new learning function that can guarantee to select the optimal transmission power level to minimize the cost function for any given channel quality. For highly dynamic postures such as walking and running, we exploit the correlation between channel quality and motion data generated by a gyroscope sensor to fastly estimate channel quality, eliminating the need to use expensive channel sampling procedures. We evaluate the performance of Chimp through experiments using TelosB motes equipped with the MPU-9250 motion sensor chip and compare it with the state-of-the-art protocols in different body postures. Experimental results demonstrate that Chimp outperforms existing schemes and works efficiently in most common body postures. In high-date-rate scenarios, it achieves almost the same performance as the optimal power assignment scheme in which the optimal power level for each transmission is calculated based on the collected channel measurements in an off-line manner.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2019:BSR, author = "Zhe Jiang and Neil Audsley and Pan Dong", title = "{BlueIO}: a Scalable Real-Time Hardware {I/O} Virtualization System for Many-core Embedded Systems", journal = j-TECS, volume = "18", number = "3", pages = "19:1--19:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309765", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309765", abstract = "In safety-critical systems, time predictability is vital. This extends to I/O operations that require predictability, timing-accuracy, parallel access, scalability, and isolation. Currently, existing approaches cannot achieve all these requirements at the same time. In this article, we propose a framework of hardware framework for real-time I/O virtualization-termed BlueIO -to meet all these requirements simultaneously. BlueIO integrates the functionalities of I/O virtualization, low-layer I/O drivers, and a clock cycle level timing-accurate I/O controller (using the GPIOCP [36]). BlueIO provides this functionality in the hardware layer, supporting abstract virtualized access to I/O from the software domain. The hardware implementation includes I/O virtualization and I/O drivers, provides isolation and parallel (concurrent) access to I/O operations, and improves I/O performance. Furthermore, the approach includes the previously proposed GPIOCP to guarantee that I/O operations will occur at a specific clock cycle (i.e., be timing-accurate and predictable). In this article, we present a hardware consumption analysis of BlueIO to show that it linearly scales with the number of CPUs and I/O devices, which is evidenced by our implementation in VLSI and FPGA. We also describe the design and implementation of BlueIO and demonstrate how a BlueIO-based system can be exploited to meet real-time requirements with significant improvements in I/O performance and a low running cost on different OSs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2019:ERH, author = "Sandeep K. Shukla", title = "Editorial: Reflections on the History of Cyber-Physical versus Embedded Systems", journal = j-TECS, volume = "18", number = "3", pages = "19:1--19:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3325115", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3325115", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tabrizi:2019:DLC, author = "Farid Molazem Tabrizi and Karthik Pattabiraman", title = "Design-Level and Code-Level Security Analysis of {IoT} Devices", journal = j-TECS, volume = "18", number = "3", pages = "20:1--20:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310353", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310353", abstract = "The Internet of Things (IoT) is playing an important role in different aspects of our lives. Smart grids, smart cars, and medical devices all incorporate IoT devices as key components. The ubiquity and criticality of these devices make them an attractive target for attackers. Therefore, we need techniques to analyze their security so that we can address their potential vulnerabilities. IoT devices, unlike remote servers, are user-facing and, therefore, an attacker may interact with them more extensively, e.g., via physical access. Existing techniques for analyzing security of IoT devices either rely on a pre-defined set of attacks and, therefore, have limited effect or do not consider the specific capabilities the attackers have against IoT devices. Security analysis techniques may operate at the design-level, leveraging abstraction to avoid state-space explosion, or at the code-level for ensuring accuracy. In this article, we introduce two techniques, one at the design-level, and the other at the code-level, to analyze security of IoT devices, and compare their effectiveness. The former technique uses model checking, while the latter uses symbolic execution, to find attacks based on the attacker's capabilities. We evaluate our techniques on an open source smart meter. We find that our code-level analysis technique is able to find three times more attacks and complete the analysis in half the time, compared to the design-level analysis technique, with no false positives.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Belson:2019:SAP, author = "Bruce Belson and Jason Holdsworth and Wei Xiang and Bronson Philippa", title = "A Survey of Asynchronous Programming Using Coroutines in the {Internet of Things} and Embedded Systems", journal = j-TECS, volume = "18", number = "3", pages = "21:1--21:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319618", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319618", abstract = "Many Internet of Things and embedded projects are event driven, and therefore require asynchronous and concurrent programming. Current proposals for C++20 suggest that coroutines will have native language support. It is timely to survey the current use of coroutines in embedded systems development. This article investigates existing research which uses or describes coroutines on resource-constrained platforms. The existing research is analysed with regard to: software platform, hardware platform, and capacity; use cases and intended benefits; and the application programming interface design used for coroutines. A systematic mapping study was performed, to select studies published between 2007 and 2018 which contained original research into the application of coroutines on resource-constrained platforms. An initial set of 566 candidate papers, collated from on-line databases, were reduced to only 35 after filters were applied, revealing the following taxonomy. The C 8 C++ programming languages were used by 22 studies out of 35. As regards hardware, 16 studies used 8- or 16-bit processors while 13 used 32-bit processors. The four most common use cases were concurrency (17 papers), network communication (15), sensor readings (9), and data flow (7). The leading intended benefits were code style and simplicity (12 papers), scheduling (9), and efficiency (8). A wide variety of techniques have been used to implement coroutines, including native macros, additional tool chain steps, new language features, and non-portable assembly language. We conclude that there is widespread demand for coroutines on resource-constrained devices. Our findings suggest that there is significant demand for a formalised, stable, well-supported implementation of coroutines in C++, designed with consideration of the special needs of resource-constrained devices, and further that such an implementation would bring benefits specific to such devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Batina:2019:ISI, author = "Lejla Batina and Sherman S. M. Chow and Gerhard Hancke and Zhe Liu", title = "Introduction to the Special Issue on Cryptographic Engineering for {Internet of Things}: Security Foundations, Lightweight Solutions, and Attacks", journal = j-TECS, volume = "18", number = "3", pages = "22:1--22:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322641", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2019:LIN, author = "Lu Zhou and Chunhua Su and Zhi Hu and Sokjoon Lee and Hwajeong Seo", title = "Lightweight Implementations of {NIST P-256} and {SM2 ECC} on $8$-bit Resource-Constraint Embedded Device", journal = j-TECS, volume = "18", number = "3", pages = "23:1--23:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3236010", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3236010", abstract = "Elliptic Curve Cryptography (ECC) now is one of the most important approach to instantiate asymmetric encryption and signature schemes, which has been extensively exploited to protect the security of cyber-physical systems. With the advent of the Internet of Things (IoT), a great deal of constrained devices may require software implementations of ECC operations. Under this circumstances, the SM2, a set of public key cryptographic algorithms based on elliptic curves published by Chinese Commercial Cryptography Administration Office, was standardized at ISO in 2017 to enhance the cyber-security. However, few research works on the implementation of SM2 for constrained devices have been conducted. In this work, we fill this gap and propose our efficient, secure, and compact implementation of scalar multiplication on a 256-bit elliptic curve recommended by the SM2, as well as a comparison implementation of scalar multiplication on the same bit-length elliptic curve recommended by NIST. We re-design some existent techniques to fit the low-end IoT platform, namely 8-bit AVR processors, and our implementations evaluated on the desired platform show that the SM2 algorithms have competitive efficiency and security with NIST, which would work well to secure the IoT world.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Turan:2019:CFF, author = "Furkan Turan and Ingrid Verbauwhede", title = "Compact and Flexible {FPGA} Implementation of {Ed25519} and {X25519}", journal = j-TECS, volume = "18", number = "3", pages = "24:1--24:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3312742", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3312742", abstract = "This article describes a field-programmable gate array (FPGA) cryptographic architecture, which combines the elliptic curve--based Ed25519 digital signature algorithm and the X25519 key establishment scheme in a single module. Cryptographically, these are high-security elliptic curve cryptography algorithms with short key sizes and impressive execution times in software. Our goal is to provide a lightweight FPGA module that enables them on resource-constrained devices, specifically for Internet of Things (IoT) applications. In addition, we aim at extensibility with customisable countermeasures against timing and differential power analysis side-channel attacks and fault-injection attacks. For the former, we offer a choice between time-optimised versus constant-time execution, with or without Z -coordinate randomisation and base-point blinding; and for the latter, we offer enabling or disabling default-case statements in the Finite State Machine (FSM) descriptions. To obtain compactness and at the same time fast execution times, we make maximum use of the Digital Signal Processing (DSP) slices on the FPGA. We designed a single arithmetic unit that is flexible to support operations with two moduli and non-modulus arithmetic. In addition, our design benefits in-place memory management and the local storage of inputs into DSP slices' pipeline registers and takes advantage of distributed memory. These eliminate a memory access bottleneck. The flexibility is offered by a micro-code supported instruction-set architecture. Our design targets 7-Series Xilinx FPGAs and is prototyped on a Zynq System-on-Chip (SoC). The base design combining Ed25519 and X25519 in a single module, and its implementation requires only around 11.1K Lookup Tables (LUTs), 2.6K registers, and 16 DSP slices. Also, it achieves performance of 1.6ms for a signature generation and 3.6ms for a signature verification for a 1024-bit message with an 82MHz clock. Moreover, the design can be optimised only for X25519, which gives the most compact FPGA implementation compared to previously published X25519 implementations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2019:XBL, author = "Weiqiang Liu and Lei Zhang and Zhengran Zhang and Chongyan Gu and Chenghua Wang and Maire O'neill and Fabrizio Lombardi", title = "{XOR}-Based Low-Cost Reconfigurable {PUFs} for {IoT} Security", journal = j-TECS, volume = "18", number = "3", pages = "25:1--25:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3274666", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274666", abstract = "With the rapid development of the Internet of Things (IoT), security has attracted considerable interest. Conventional security solutions that have been proposed for the Internet based on classical cryptography cannot be applied to IoT nodes as they are typically resource-constrained. A physical unclonable function (PUF) is a hardware-based security primitive and can be used to generate a key online or uniquely identify an integrated circuit (IC) by extracting its internal random differences using so-called challenge-response pairs (CRPs). It is regarded as a promising low-cost solution for IoT security. A logic reconfigurable PUF (RPUF) is highly efficient in terms of hardware cost. This article first presents a new classification for RPUFs, namely circuit-based RPUF (C-RPUF) and algorithm-based RPUF (A-RPUF); two Exclusive OR (XOR)-based RPUF circuits (an XOR-based reconfigurable bistable ring PUF (XRBR PUF) and an XOR-based reconfigurable ring oscillator PUF (XRRO PUF)) are proposed. Both the XRBR and XRRO PUFs are implemented on Xilinx Spartan-6 field-programmable gate arrays (FPGAs). The implementation results are compared with previous PUF designs and show good uniqueness and reliability. Compared to conventional PUF designs, the most significant advantage of the proposed designs is that they are highly efficient in terms of hardware cost. Moreover, the XRRO PUF is the most efficient design when compared with previous RPUFs. Also, both the proposed XRRO and XRBR PUFs require only 12.5\% of the hardware resources of previous bitstable ring PUFs and reconfigurable RO PUFs, respectively, to generate a 1-bit response. This confirms that the proposed XRBR and XRRO PUFs are very efficient designs with good uniqueness and reliability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2019:ESA, author = "Robert P. Lee and Konstantinos Markantonakis and Raja Naeem Akram", title = "Ensuring Secure Application Execution and Platform-Specific Execution in Embedded Devices", journal = j-TECS, volume = "18", number = "3", pages = "26:1--26:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3284361", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3284361", abstract = "The Internet of Things (IoT) is expanding at a large rate, with devices found in commercial and domestic settings from industrial sensors to home appliances. However, as the IoT market grows, so does the number of attacks made against it with some reports claiming an increase of 600\% in 2017. This work seeks to prevent code replacement, injection, and exploitation attacks by ensuring correct and platform specific application execution. This combines two previously studied problems: secure application execution and binding hardware and software. We present descriptions of both problems and requirements for ensuring both simultaneously. We then propose a scheme extending previous work that meets these requirements, and describe our implementation of the soft-core Secure Execution Processor developed and tested on Xilinx Spartan-6 FPGA. Finally, we analyse the scheme and our implementation according to performance and the requirements listed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cherif:2019:LSD, author = "Amina Cherif and Malika Belkadi and Damien Sauveron", title = "A Lightweight and Secure Data Collection Serverless Protocol Demonstrated in an Active {RFIDs} Scenario", journal = j-TECS, volume = "18", number = "3", pages = "27:1--27:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3274667", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3274667", abstract = "In the growing Internet of Things context, thousands of computing devices with various functionalities are producing data (from environmental sensors or other sources). However, they are also collecting, storing, processing and transmitting data to eventually communicate them securely to third parties (e.g., owners of devices or cloud data storage). The deployed devices are often battery-powered mobile or static nodes equipped with sensors and/or actuators, and they communicate using wireless technologies. Examples include unmanned aerial vehicles, wireless sensor nodes, smart beacons, and wearable health objects. Such resource-constrained devices include Active Radio Frequency IDentification (RFID) nodes, and these are used to illustrate our proposal. In most scenarios, these nodes are unattended in an adverse environment, so data confidentiality must be ensured from the sensing phase through to delivery to authorized entities: in other words, data must be securely stored and transmitted to prevent attack by active adversaries even if the nodes are captured. However, due to the scarce resources available to nodes in terms of energy, storage, and/or computation, the proposed security solution has to be lightweight. In this article, we propose a serverless protocol to enable Mobile Data Collectors (MDCs), such as drones, to securely collect data from mobile and static Active RFID nodes and then deliver them later to an authorized third party. The whole solution ensures data confidentiality at each step (from the sensing phase, before data collection by the MDC, once data have been collected by MDC, and during final delivery), while fulfilling the lightweight requirements for the resource-limited entities involved. To assess the suitability of the protocol against the performance requirements, it was implemented on the most resource-constrained devices to get the worst possible results. In addition, to prove the protocol fulfills the security requirements, it was analyzed using security games and also formally verified using the AVISPA and ProVerif tools.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2019:LCP, author = "Lu Zhou and Chunhua Su and Kuo-Hui Yeh", title = "A Lightweight Cryptographic Protocol with Certificateless Signature for the {Internet of Things}", journal = j-TECS, volume = "18", number = "3", pages = "28:1--28:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3301306", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3301306", abstract = "The universality of smart-devices has brought rapid development and the significant advancement of ubiquitous applications for the Internet of Things (IoT). Designing new types of IoT-compatible cryptographic protocols has become a more popular way to secure IoT-based applications. Significant attention has been dedicated to the challenge of implementing a lightweight and secure cryptographic protocol for IoT devices. In this study, we propose a lightweight cryptographic protocol integrating certificateless signature and bilinear pairing crypto-primitives. In the proposed protocol, we elegantly refine the processes to account for computation-limited IoT devices during security operations. Rigorous security analyses are conducted to guarantee the robustness of the proposed cryptographic protocol. In addition, we demonstrate a thorough performance evaluation, where an IoT-based test-bed, i.e., the Raspberry PI, is simulated as the underlying platform of the implementation of our proposed cryptographic protocol. The results show the practicability of the proposed protocol.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sha:2019:CED, author = "Le-Tian Sha and Fu Xiao and Hai-Ping Huang and Yu Chen and Ru-Chuan Wang", title = "Catching Escapers: a Detection Method for Advanced Persistent Escapers in Industry {Internet of Things} Based on Identity-based Broadcast Encryption {(IBBE)}", journal = j-TECS, volume = "18", number = "3", pages = "29:1--29:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319615", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3319615", abstract = "As the Industry 4.0 or Internet of Things (IoT) era begins, security plays a key role in the Industry Internet of Things (IIoT) due to various threats, which include escape or Distributed Denial of Service (DDoS) attackers in the virtualization layer and vulnerability exploiters in the device layer. A successful cross-VM escape attack in the virtualization layer combined with cross-layer penetration in the device layer, which we define as an Advanced Persistent Escaper (APE), poses a great threat. Therefore, the development of detection and rejection methods for APEs across multiple layers in IIoT is an open issue. To the best of our knowledge, less effective methods are established, especially for vulnerability exploitation in the virtualization layer and backdoor leverage in the device layer. On the basis of this, we propose Escaper Cops (EscaperCOP), a detection method for cross-VM escapers in the virtualization layer and cross-layer penetrators in the device layer. In particular, a new detection method for guest-to-host escapers is proposed for the virtualization layer. Finally, a novel encryption method based on Identity-based Broadcast Encryption (IBBE) is proposed to protect the critical components in EscaperCOP, detection library, and control command library. To verify our method, experimental tests are performed for a large number of APEs in an IIoT framework. The test results have demonstrated the proposed method is effective with an acceptable level of detection ratio.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2019:OPM, author = "Rehan Ahmed and Bernhard Buchli and Stefan Draskovic and Lukas Sigrist and Pratyush Kumar and Lothar Thiele", title = "Optimal Power Management with Guaranteed Minimum Energy Utilization for Solar Energy Harvesting Systems", journal = j-TECS, volume = "18", number = "4", pages = "30:1--30:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3317679", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3317679", abstract = "In this work, we present a formal study on optimizing the energy consumption of energy harvesting embedded systems. To deal with the uncertainty inherent in solar energy harvesting systems, we propose the Stochastic Power Management (SPM) scheme, which builds statistical models of harvested energy based on historical data. The proposed stochastic scheme maximizes the lowest energy consumption across all time intervals while giving strict probabilistic guarantees on not encountering battery depletion. For situations where historical data is not available, we propose the use of (i) a Finite Horizon Control (FHC) scheme and (ii) a non-uniformly scaled energy estimator based on an astronomical model, which is used by FHC. Under certain realistic assumptions, the FHC scheme can provide guarantees on minimum energy usage that can be supported over all times. We further propose and evaluate a piece-wise linear approximation of FHC for efficient implementation in resource-constrained embedded systems. With extensive experimental evaluation for eight publicly available datasets and two datasets collected with our own deployments, we quantitatively establish that the proposed solutions are highly effective at providing a guaranteed minimum service level and significantly outperform existing solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2019:EAR, author = "Sandeep K. Shukla", title = "Editorial: Adversaries and Robustness", journal = j-TECS, volume = "18", number = "4", pages = "30:1--30:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3345556", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3345556", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2019:CDM, author = "Daibo Liu and Zhichao Cao and Mingyan Liu and Mengshu Hou and Hongbo Jinag", title = "Contention-Detectable Mechanism for Receiver-Initiated {MAC}", journal = j-TECS, volume = "18", number = "4", pages = "31:1--31:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3317683", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3317683", abstract = "The energy efficiency and delivery robustness are two critical issues for low duty-cycled wireless sensor networks. The asynchronous receiver-initiated duty-cycling media access control (MAC) protocols have shown their effectiveness through various studies. In receiver-initiated MACs, packet transmission is triggered by the probe of receiver. However, it suffers from the performance degradation incurred by packet collision, especially under bursty traffic. Several protocols have been proposed to address this problem, but their performance is restricted by the unnecessary backoff time and long negotiation process. In this article, we present CD-MAC, an energy-efficient and robust contention-detectable mechanism for addressing the collision-catching problem in receiver-initiated MACs. By exploring the temporal diversity of the acknowledgments, a receiver recognizes the potential senders and subsequently polls individual senders one by one. On that basis, CD-MAC can successfully avoid packet collision even though multiple senders have data packets to transmit to the same receiver. We implement CD-MAC in TinyOS and evaluate its performance on an indoor testbed with single-hop and multi-hop network scenarios. The results show that CD-MAC can significantly improve throughput by 1.72 times compared with the state-of-the-art receiver-initiated MAC protocol under bursty traffic loads. The results also demonstrate that CD-MAC can effectively mitigate the influence of hidden terminal problem and adapt to network dynamics well.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2019:NNA, author = "Xiaokang Wang and Laurence T. Yang and Hongguo Li and Man Lin and Jianjun Han and Bernady O. Apduhan", title = "{NQA}: a Nested Anti-collision Algorithm for {RFID} Systems", journal = j-TECS, volume = "18", number = "4", pages = "32:1--32:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3330139", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3330139", abstract = "Radio frequency identification (RFID) systems, as one of the key components in the Internet of Things (IoT), have attracted much attention in the domains of industry and academia. In practice, the performance of RFID systems rather relies on the effectiveness and efficiency of anti-collision algorithms. A large body of studies have recently focused on the anti-collision algorithms, such as the Q-algorithm (QA), which has been successfully utilized in EPCglobal Class-1 Generation-2 protocol. However, the performance of those anti-collision algorithms needs to be further improved. Observe that fully exploiting the pre-processing time can improve the efficiency of the QA algorithm. With an objective of improving the performance for anti-collision, we propose a Nested Q-algorithm (NQA), which makes full use of such pre-processing time and incorporates the advantages of both Binary Tree (BT) algorithm and QA algorithm. Specifically, based on the expected number of collision tags, the NQA algorithm can adaptively select either BT or QA to identify collision tags. Extensive simulation results validate the efficiency and effectiveness of our proposed NQA (i.e., less running time for processing the same number of active tags) when compared to the existing algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Su:2019:TFR, author = "Fang Su and Yongpan Liu and Xiao Sheng and Hyung Gyu Lee and Naehyuck Chang and Huazhong Yang", title = "A Task Failure Rate Aware Dual-Channel Solar Power System for Nonvolatile Sensor Nodes", journal = j-TECS, volume = "18", number = "4", pages = "33:1--33:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320270", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320270", abstract = "In line with the rapid development of the Internet of Things (IoT), the maintenance of on-board batteries for a trillion sensor nodes has become prohibitive both in time and costs. Energy harvesting is a promising solution to this problem. However, conventional energy-harvesting systems with storage suffer from low efficiency because of conversion loss and storage leakage. Direct supply systems without energy buffer provide higher efficiency, but fail to satisfy quality of service (QoS) due to mismatches between input power and workloads. Recently, a novel dual-channel photovoltaic power system has paved the way to achieve both high energy efficiency and QoS guarantee. This article focuses on the design-time and run-time co-optimization of the dual-channel solar power system. At the design stage, we develop a task failure rate estimation framework to balance design costs and failure rate. At run-time, we propose a task failure rate aware QoS tuning algorithm to further enhance energy efficiency. Through the experiments on both a simulation platform and a prototype board, this study demonstrates a 27\% task failure rate reduction compared with conventional architectures with identical design costs. And the proposed online QoS tuning algorithm brings up to 30\% improvement in energy efficiency with nearly zero failure rate penalty.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ponugoti:2019:EFH, author = "Mounika Ponugoti and Aleksandar Milenkovic", title = "Enabling On-the-Fly Hardware Tracing of Data Reads in Multicores", journal = j-TECS, volume = "18", number = "4", pages = "34:1--34:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322642", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322642", abstract = "Software debugging is one of the most challenging aspects of embedded system development due to growing hardware and software complexity, limited visibility of system components, and tightening time-to-market. To find software bugs faster, developers often rely on on-chip trace modules with large buffers to capture program execution traces with minimum interference with program execution. However, the high volumes of trace data and the high cost of trace modules limit the visibility into the system operation to short program segments. This article introduces a new hardware/software technique for capturing and filtering read data value traces in multicores that enables a complete reconstruction of parallel program execution. The proposed technique exploits tracking of data reads in data caches and cache coherence protocol states to minimize the number of trace messages streamed out of the target platform to the software debugger. The effectiveness of the proposed technique is determined by analyzing the required trace port bandwidth and trace buffer sizes as a function of the data cache size and the number of processor cores. The results show that the proposed technique significantly reduces the required trace port bandwidth, from 12.2 to 73.9 times, when compared to the Nexus-like read data value tracing, thus enabling continuous on-the-fly data tracing at modest hardware cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Al-bayati:2019:PSD, author = "Zaid Al-bayati and Youcheng Sun and Haibo Zeng and Marco {Di Natale} and Qi Zhu and Brett H. Meyer", title = "Partitioning and Selection of Data Consistency Mechanisms for Multicore Real-Time Systems", journal = j-TECS, volume = "18", number = "4", pages = "35:1--35:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3320271", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3320271", abstract = "Multicore platforms are becoming increasingly popular in real-time systems. One of the major challenges in designing multicore real-time systems is ensuring consistent and timely access to shared resources. Lock-based protection mechanisms such as MPCP and MSRP have been proposed to guarantee mutually exclusive access in multicore systems at the expense of blocking. In this article, we consider partitioning and scheduling in multicore real-time systems with resource sharing. We first propose a resource-aware task partitioning algorithm for systems with lock-based protection. Wait-free methods, which ensure consistent access to shared memory resources with negligible blocking at the expense of additional memory space, are a suitable alternative when the shared resource is a communication buffer. We propose several approaches to solve the joint problem of task partitioning and the selection of a data consistency mechanism (lock-based or wait-free). The problem is first formulated as an Integer Linear Programming (ILP). For large systems where an ILP solution is not scalable, we propose two heuristic algorithms. Experimental results compare the effectiveness of the proposed approaches in finding schedulable systems with low memory cost and show how the use of wait-free methods can significantly improve schedulability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Desirena-Lopez:2019:TAR, author = "G. Desirena-L{\'o}pez and A. Ram{\'\i}rez-Trevi{\~n}o and J. L. Briz and C. R. V{\'a}zquez and D. G{\'o}mez-Guti{\'e}rrez", title = "Thermal-aware Real-time Scheduling Using Timed Continuous {Petri} Nets", journal = j-TECS, volume = "18", number = "4", pages = "36:1--36:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3322643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3322643", abstract = "We present a thermal-aware, hard real-time (HRT) global scheduler for a multiprocessor system designed upon three novel techniques. First, we present a modeling methodology based on Timed Continuous Petri nets (TCPN) that yields a complete state variable model, including job arrivals, CPU usage, power, and thermal behavior. The model is accurate and avoids the calibration stage of RC thermal models. Second, based on this model, a linear programming problem (LPP) determines the existence of a feasible HRT thermal-aware schedule. Last, a sliding-mode controller and an online discretization algorithm implement the global HRT scheduler, which is capable of managing thermal constraints, context switching, migrations, and disturbances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ruaro:2019:SAQ, author = "Marcelo Ruaro and Axel Jantsch and Fernando Gehm Moraes", title = "Self-Adaptive {QoS} Management of Computation and Communication Resources in Many-Core {SoCs}", journal = j-TECS, volume = "18", number = "4", pages = "37:1--37:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3328755", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3328755", abstract = "Providing quality of service (QoS) for many-core systems with dynamic application admission is challenging due to the high amount of resources to manage and the unpredictability of computation and communication events. Related works propose a self-adaptive QoS mechanism concerned either in communication or computation resources, lacking, however, a comprehensive QoS management of both. Assuming a many-core system with QoS monitoring, runtime circuit-switching establishment, task migration, and a soft real-time task scheduler, this work fills this gap by proposing a novel self-adaptive QoS management. The contribution of this proposal comes with the following features in the QoS management: (i) comprehensiveness, by covering communication and computation resources; (ii) online, adopting the ODA (Observe, Decide, Act) runtime closed-loop adaptation; and (iii) reactive and proactive decisions, by using a dynamic application profile extraction technique, which enables the QoS management to be aware of the profile of running applications, allowing it to take proactive decisions based on a prediction analysis. The proposed QoS management adopts a decentralized organization by partitioning the system in clusters, each one managed by a dedicated processor, making the proposal scalable. Results show that the proactive feature accurately extracts the applications' profile, and can prevent future QoS violations. The synergy of reactive and proactive decisions was able to sustain QoS, reducing the deadline miss rate by 99.5\% with a severe disturbance in communication and computation levels, and avoiding deadline misses up to 70\% of system utilization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ali:2019:CCT, author = "G. G. Md. Nawaz Ali and Md. Noor-A-Rahim and Md. Ashiqur Rahman and Beshah Ayalew and Peter H. J. Chong and Yong Liang Guan", title = "Cooperative Cache Transfer-based On-demand Network Coded Broadcast in Vehicular Networks", journal = j-TECS, volume = "18", number = "4", pages = "38:1--38:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3329865", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:43 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3329865", abstract = "Real-time traffic updates, safety and comfort driving, infotainment, and so on, are some envisioned applications in vehicular networks. Unlike traditional broadcast, network-coding-assisted broadcast can satisfy multiple vehicles with different data items in a coded form. However, server side encoding requires the prior knowledge about vehicles' cache information for the successful decoding at the vehicles' sides. The explicit cache upload from vehicles to Road Side Unit (RSU) wastes upload bandwidth. In multi-RSU vehicular networks, we propose a Cooperative Cache Transfer-based On-demand Network Coded Broadcast called CCTCB. In the proposed CCTCB approach, vehicles do not need to upload their cache information to the server, rather the RSU server learns the vehicles' cache intrinsically. We derive a probabilistic model to analyze the coding opportunity in the proposed cooperative cache transfer mechanism incorporating vehicle mobility. The comprehensive simulation results validate the superiority of the proposed approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2019:OIW, author = "Yu-Chieh Chen and Ching-Chih Chang and Ramesh Perumal and Shih-Rung Yeh and Yen-Chung Chang and Hsin Chen", title = "Optimization and Implementation of Wavelet-based Algorithms for Detecting High-voltage Spindles in Neuron Signals", journal = j-TECS, volume = "18", number = "5", pages = "39:1--39:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3329864", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3329864", abstract = "This article presents a microcontroller unit (MCU) based simplified discrete wavelet transform (Sim-DWT) algorithm that can detect high-voltage spindles (HVSs) in local field potential (LFP) signals. The Sim-DWT algorithm operates in an 8-bit MCU, 8MHz operating clock and 16 sample points of buffers to detect HVSs with a frequency range of 5-15Hz. The requirement of only sixteen 8-bit sample points as the window length for calculation and no need for a multiplier render the Sim-DWT easy to implement in an MCU with limited hardware resources. The Sim-DWT is applied in an 8-bit MCU with 6mW power consumption (including IO ports) and was tested for detecting LFP signals in vivo. The design methods and the accuracy of three typical types of mother wavelet functions (Haar, DB4, Morlet) in the Sim-DWT were also tested and compared with those of a PC-based system. The experimental results showed that with appropriately designed cMW functions in the Sim-DWT, HVSs could be detected more accurately than they could be in PC-based software. The present study indicates that the optimized HVS detector (Sim-DWT) can be implemented in an 8-bit MCU with limited hardware resources and is suitable to serve as the digital core in a closed-loop deep brain stimulator microsystem in the future.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Noltsis:2019:CLC, author = "Michail Noltsis and Nikolaos Zambelis and Francky Catthoor and Dimitrios Soudris", title = "A Closed-Loop Controller to Ensure Performance and Temperature Constraints for Dynamic Applications", journal = j-TECS, volume = "18", number = "5", pages = "40:1--40:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3343030", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3343030", abstract = "To secure correct system operation, a plethora of Reliability, Availability and Serviceability (RAS) techniques have been deployed by circuit designers. RAS mechanisms however, come with the cost of extra clock cycles. In addition, a wide variety of dynamic workloads and different input conditions often constitute preemptive dependability techniques hard to implement. To this end, we focus on a realistic case study of a closed-loop controller that mitigates performance variation with a reactive response. This concept has been discussed but was only illustrated on small benchmarks. In particular, the extension of the approach to manage performance of dynamic workloads on a target platform has not been shown earlier. We compare our scheme against the version of a Linux CPU frequency governor in terms of timing response and energy consumption. Finally, we move forward and suggest a new flavor of our controller to efficiently manage processor temperature. Again, the concept is illustrated with a realistic case study and compared to a modern temperature manager.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Durrieu:2019:GAC, author = "Guy Durrieu and Claire Pagetti", title = "{GRec}: Automatic Computation of Reconfiguration Graphs for Multi-core Platforms", journal = j-TECS, volume = "18", number = "5", pages = "41:1--41:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3350533", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3350533", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2019:SEA, author = "Siwen Zhu and Yi Tang and Junxiang Zheng and Yongzhi Cao and Hanpin Wang and Yu Huang and Marian Margraf", title = "Sample Essentiality and Its Application to Modeling Attacks on Arbiter {PUFs}", journal = j-TECS, volume = "18", number = "5", pages = "42:1--42:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3344148", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3344148", abstract = "Physically Unclonable Functions (PUFs), as an alternative hardware-based security method, have been challenged by some modeling attacks. As is known to all, samples are significant in modeling attacks on PUFs, and thus, some efforts have been made to expand sample sets therein to improve modeling attacks. A closer examination, however, reveals that not all samples contribute to modeling attacks equally. Therefore, in this article, we introduce the concept of sample essentiality for describing the contribution of a sample in modeling attacks and point out that any sample without sample essentiality cannot enhance some modeling attacks on PUFs. As a by-product, we find theoretically and empirically that the samples expanded by the procedures proposed by Chatterjee et al. do not satisfy our sample essentiality. Furthermore, we propose the notion of essential sample sets for datasets and discuss its basic properties. Finally, we demonstrate that our results about sample essentiality can be used to reduce samples efficiently and benefit sample selection in modeling attacks on arbiter PUFs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Strobel:2019:PMA, author = "Manuel Strobel and Martin Radetzki", title = "Power-mode-aware Memory Subsystem Optimization for Low-power System-on-Chip Design", journal = j-TECS, volume = "18", number = "5", pages = "43:1--43:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3356583", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3356583", abstract = "The memory subsystem is increasingly subject to an intensive energy minimization effort in embedded and System-on-Chip development. While the main focus is typically put on energy consumption reduction, there are other optimization aspects that become more and more relevant as well, e.g., peak power constraints or time budgets. In this regard, the present article makes the following contributions. Taking industrial-grade information into account, different Static Random-Access Memory (SRAM) power modes and their characteristics are presented at first. Using this information, a comprehensive optimization model with the main intention of energy minimization is defined. It is based on memory access statistics that represent the embedded software of interest, which allows for application-tailored improvements. Further, it considers different power states of the memory subsystem and enables the definition of peak power and time corridor constraints. The presented two-stage implementation of this optimization model allows the handling of large design spaces. Clearly defined interfaces facilitate the exchange of individual workflow parts in a plug-and-play fashion and further enable a neat integration of our optimization method with existing hardware/software (HW/SW) codesign synthesis flows. A general evaluation for different technology nodes yields that the optimization potential of memory low-power modes increases with advancing miniaturization but also depends on the data footprint of the embedded software. Experimental results for a set of benchmark applications confirm these findings and provide energy savings of up to 90\% and over 60\% on average compared to a monolithic memory layout without low-power modes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Motamedi:2019:DNA, author = "Mohammad Motamedi and Felix A. Portillo and Daniel Fong and Soheil Ghiasi", title = "{Distill-Net}: Application-Specific Distillation of Deep Convolutional Neural Networks for Resource-Constrained {IoT} Platforms", journal = j-TECS, volume = "18", number = "5", pages = "44:1--44:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3360512", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3360512", abstract = "Many Internet-of-Things (IoT) applications demand fast and accurate understanding of a few key events in their surrounding environment. Deep Convolutional Neural Networks (CNNs) have emerged as an effective approach to understand speech, images, and similar high-dimensional data types. Algorithmic performance of modern CNNs, however, fundamentally relies on learning class-agnostic hierarchical features that only exist in comprehensive training datasets with many classes. As a result, fast inference using CNNs trained on such datasets is prohibitive for most resource-constrained IoT platforms. To bridge this gap, we present a principled and practical methodology for distilling a complex modern CNN that is trained to effectively recognize many different classes of input data into an application-dependent essential core that not only recognizes the few classes of interest to the application accurately but also runs efficiently on platforms with limited resources. Experimental results confirm that our approach strikes a favorable balance between classification accuracy (application constraint), inference efficiency (platform constraint), and productive development of new applications (business constraint).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2019:RTA, author = "Quan Zhou and Guohui Li and Jianjun Li and Chenggang Deng and Ling Yuan", title = "Response Time Analysis for Tasks with Fixed Preemption Points under Global Scheduling", journal = j-TECS, volume = "18", number = "5", pages = "111:1--111:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3360513", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3360513", abstract = "As an effective method for detecting the schedulability of real-time tasks on multiprocessor platforms, Response time analysis (RTA) has been deeply researched in recent decades. Most of the existing RTA methods are designed for tasks that can be preempted at any time. However, in some real-time systems, a task may have some fixed preemption points (FPPs) that divide its execution into a series of non-preemptive regions (NPRs). In such environments, the task can only be preempted at its FPPs, which makes existing RTA methods for arbitrary preemption tasks not applicable. In this article, we study the schedulability analysis on tasks with FPPs under both global fixed-priority (G-FP) scheduling and global earliest deadline first (G-EDF) scheduling. First, based on the idea of limiting the time interval between two consecutive executions of an NPR, a novel RTA method for tasks with FPPs under G-FP scheduling is proposed. Second, we propose an effective RTA method for tasks with FPPs under G-EDF scheduling. Finally, extensive simulations are conducted and the results validate the effectiveness of the proposed methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yu:2019:TND, author = "Jiecao Yu and Andrew Lukefahr and Reetuparna Das and Scott Mahlke", title = "{TF-Net}: Deploying Sub-Byte Deep Neural Networks on Microcontrollers", journal = j-TECS, volume = "18", number = "5s", pages = "45:1--45:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358189", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358189", abstract = "Deep Neural Networks (DNNs) have become an essential component of various applications. While today's DNNs are mainly restricted to cloud services, network connectivity, energy, and data privacy problems make it important to support efficient DNN computation on low-cost, low-power processors like microcontrollers. However, due to the constrained computation resources, it is challenging to execute large DNN models on microcontrollers. Using sub-byte low-precision input activations and weights is a typical method to reduce DNN computation. But on byte-addressable microcontrollers, the sub-byte computation is not well supported. The sub-byte inputs and weights need to be unpacked from bitstreams before computation, which incurs significant computation and energy overhead. In this paper, we propose the TF-Net pipeline to efficiently deploy sub-byte DNNs on microcontrollers. While TF-Net allows for a range of weight and input precision, we find Ternary weights and Four-bit inputs provide the optimal balance between model accuracy, computation performance, and energy efficiency. TF-Net first includes a training framework for sub-byte low-precision DNN models. Two algorithms are then introduced to accelerate the trained models. The first, direct buffer convolution, amortizes unpacking overhead by caching unpacked inputs. The second, packed sub-byte multiply-accumulate, utilizes a single multiplication instruction to perform multiple sub-byte multiply-accumulate computations. To further accelerate DNN computation, we propose two instructions, Multiply-Shift-Accumulate and Unpack, to extend the existing microcontroller instruction set. On the tested networks, TF-Net can help improve the computation performance and energy efficiency by $ 1.83 \times $ and $ 2.28 \times $ on average, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Goncalves:2019:AER, author = "Larissa Rozales Gon{\c{c}}alves and Rafael F{\~a}o {De Moura} and Luigi Carro", title = "Aggressive Energy Reduction for Video Inference with Software-only Strategies", journal = j-TECS, volume = "18", number = "5s", pages = "46:1--46:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358174", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358174", abstract = "In the past years, several works have proposed custom hardware and software-based techniques for the acceleration of Convolutional Neural Networks (CNNs). Most of these works focus on saving computations by changing the used precision or modifying frame processing. To reach a more aggressive energy reduction, in this paper we propose software-only modifications to the CNNs inference process. Our approach exploits the inherent locality in videos by replacing entire frame computations with a movement prediction algorithm. Furthermore, when a frame must be processed, we avoid energy-demanding floating-point operations, and at the same time reduce memory accesses by employing look-up tables in place of the original convolutions. Using the proposed approach, one can reach significant energy gains of more than $ 25 \times $ for security cameras, and $ 12 \times $ for moving vehicles applications, with only small software modifications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2019:CCL, author = "Jeff (Jun) Zhang and Parul Raj and Shuayb Zarar and Amol Ambardekar and Siddharth Garg", title = "{CompAct}: On-chip Compression of Activations for Low Power Systolic Array Based {CNN} Acceleration", journal = j-TECS, volume = "18", number = "5s", pages = "47:1--47:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358178", abstract = "This paper addresses the design of systolic array (SA) based convolutional neural network (CNN) accelerators for mobile and embedded domains. On- and off-chip memory accesses to the large activation inputs (sometimes called feature maps) of CNN layers contribute significantly to total energy consumption for such accelerators; while prior has proposed off-chip compression, activations are still stored on-chip in uncompressed form, requiring either large on-chip activation buffers or slow and energy-hungry off-chip accesses. In this paper, we propose CompAct, a new architecture that enables on-chip compression of activations for SA based CNN accelerators. CompAct is built around several key ideas. First, CompAct identifies an SA schedule that has nearly regular access patterns, enabling the use of a modified run-length coding scheme (RLC). Second, CompAct improves compression ratio of the RLC scheme using Sparse-RLC in later CNN layers and Lossy-RLC in earlier layers. Finally, CompAct proposes look-ahead snoozing that operates synergistically with RLC to reduce the leakage energy of activation buffers. Based on detailed synthesis results, we show that CompAct enables up to 62\% reduction in activation buffer energy, and 34\% reduction in total chip energy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Castro-Godinez:2019:EBE, author = "Jorge Castro-God{\'\i}nez and Muhammad Shafique and J{\"o}rg Henkel", title = "{ECAx}: Balancing Error Correction Costs in Approximate Accelerators", journal = j-TECS, volume = "18", number = "5s", pages = "48:1--48:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358179", abstract = "Approximate computing has emerged as a design paradigm amenable to error-tolerant applications. It enables trading the quality of results for efficiency improvement in terms of delay, power, and energy consumption under user-provided tolerable quality degradation. Approximate accelerators have been proposed to expedite frequently executing code sections of error-resilient applications while meeting a defined quality level. However, these accelerators may produce unacceptable errors at run time if the input data changes or dynamic adjustments are made for a defined output quality constraint. State-of-the-art approaches in approximate computing address this issue by correctly re-computing those accelerator invocations that produce unacceptable errors; this is achieved by using the host processor or an alternate exact accelerator, which is activated on-demand. Nevertheless, such approaches can nullify the benefits of approximate computing, especially when input data variations are high at run time and errors due to approximations are above a tolerable threshold. As a robust and general solution to this problem, we propose ECAx, a novel methodology to explore low-overhead error correction in approximate accelerators by selectively correcting most significant errors, in terms of their magnitude, without losing the gains of approximations. We particularly consider the case of approximate accelerators built with approximate functional units such as approximate adders. Our novel methodology reduces the required exact re-computations on the host processor, achieving up to 20\% performance gain compared to state-of-the-art approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhat:2019:ULE, author = "Ganapati Bhat and Yigit Tuncel and Sizhe An and Hyung Gyu Lee and Umit Y. Ogras", title = "An Ultra-Low Energy Human Activity Recognition Accelerator for Wearable Health Applications", journal = j-TECS, volume = "18", number = "5s", pages = "49:1--49:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358175", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358175", abstract = "Human activity recognition (HAR) has recently received significant attention due to its wide range of applications in health and activity monitoring. The nature of these applications requires mobile or wearable devices with limited battery capacity. User surveys show that charging requirement is one of the leading reasons for abandoning these devices. Hence, practical solutions must offer ultra-low power capabilities that enable operation on harvested energy. To address this need, we present the first fully integrated custom hardware accelerator (HAR engine) that consumes 22.4 $ \mu $J per operation using a commercial 65 nm technology. We present a complete solution that integrates all steps of HAR, i.e., reading the raw sensor data, generating features, and activity classification using a deep neural network (DNN). It achieves 95\% accuracy in recognizing 8 common human activities while providing three orders of magnitude higher energy efficiency compared to existing solutions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wijerathne:2019:CHT, author = "Dhananjaya Wijerathne and Zhaoying Li and Manupa Karunarathne and Anuj Pathania and Tulika Mitra", title = "{CASCADE}: High Throughput Data Streaming via Decoupled Access-Execute {CGRA}", journal = j-TECS, volume = "18", number = "5s", pages = "50:1--50:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358177", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358177", abstract = "A Coarse-Grained Reconfigurable Array (CGRA) is a promising high-performance low-power accelerator for compute-intensive loop kernels. While the mapping of the computations on the CGRA is a well-studied problem, bringing the data into the array at a high throughput remains a challenge. A conventional CGRA design involves on-array computations to generate memory addresses for data access undermining the attainable throughput. A decoupled access-execute architecture, on the other hand, isolates the memory access from the actual computations resulting in a significantly higher throughput. We propose a novel decoupled access-execute CGRA design called CASCADE with full architecture and compiler support for high-throughput data streaming from an on-chip multi-bank memory. CASCADE offloads the address computations for the multi-bank data memory access to a custom designed programmable hardware. An end-to-end fully-automated compiler synchronizes the conflict-free movement of data between the memory banks and the CGRA. Experimental evaluations show on average $ 3 \times $ performance benefit and $ 2.2 \times $ performance per watt improvement for CASCADE compared to an iso-area conventional CGRA with a bigger processing array in lieu of a dedicated hardware memory address generation logic.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Restuccia:2019:YBA, author = "Francesco Restuccia and Marco Pagani and Alessandro Biondi and Mauro Marinoni and Giorgio Buttazzo", title = "Is Your Bus Arbiter Really Fair? {Restoring} Fairness in {AXI} Interconnects for {FPGA SoCs}", journal = j-TECS, volume = "18", number = "5s", pages = "51:1--51:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358183", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358183", abstract = "AMBA AXI is a popular bus protocol that is widely adopted as the medium to exchange data in field-programmable gate array system-on-chips (FPGA SoCs). The AXI protocol does not specify how conflicting transactions are arbitrated and hence the design of bus arbiters is left to the vendors that adopt AXI. Typically, a round-robin arbitration is implemented to ensure a fair access to the bus by the master nodes, as for the popular SoCs by Xilinx. This paper addresses a critical issue that can arise when adopting the AXI protocol under round-robin arbitration; specifically, in the presence of bus transactions with heterogeneous burst sizes. First, it is shown that a completely unfair bandwidth distribution can be achieved under some configurations, making possible to arbitrarily decrease the bus bandwidth of a target master node. This issue poses serious performance, safety, and security concerns. Second, a low-latency (one clock cycle) module named AXI burst equalizer (ABE) is proposed to restore fairness. Our investigations and proposals are supported by implementations and tests upon three modern SoCs. Experimental results are reported to confirm the existence of the issue and assess the effectiveness of the ABE with bus traffic generators and hardware accelerators from the Xilinx's IP library.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mandal:2019:APM, author = "Sumit K. Mandal and Raid Ayoub and Michael Kishinevsky and Umit Y. Ogras", title = "Analytical Performance Models for {NoCs} with Multiple Priority Traffic Classes", journal = j-TECS, volume = "18", number = "5s", pages = "52:1--52:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358176", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358176", abstract = "Networks-on-chip (NoCs) have become the standard for interconnect solutions in industrial designs ranging from client CPUs to many-core chip-multiprocessors. Since NoCs play a vital role in system performance and power consumption, pre-silicon evaluation environments include cycle-accurate NoC simulators. Long simulations increase the execution time of evaluation frameworks, which are already notoriously slow, and prohibit design-space exploration. Existing analytical NoC models, which assume fair arbitration, cannot replace these simulations since industrial NoCs typically employ priority schedulers and multiple priority classes. To address this limitation, we propose a systematic approach to construct priority-aware analytical performance models using micro-architecture specifications and input traffic. Our approach decomposes the given NoC into individual queues with modified service time to enable accurate and scalable latency computations. Specifically, we introduce novel transformations along with an algorithm that iteratively applies these transformations to decompose the queuing system. Experimental evaluations using real architectures and applications show high accuracy of 97\% and up to $ 2.5 \times $ speedup in full-system simulation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Song:2019:EEP, author = "Shihao Song and Anup Das and Onur Mutlu and Nagarajan Kandasamy", title = "Enabling and Exploiting Partition-Level Parallelism {(PALP)} in Phase Change Memories", journal = j-TECS, volume = "18", number = "5s", pages = "53:1--53:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358180", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358180", abstract = "Phase-change memory (PCM) devices have multiple banks to serve memory requests in parallel. Unfortunately, if two requests go to the same bank, they have to be served one after another, leading to lower system performance. We observe that a modern PCM bank is implemented as a collection of partitions that operate mostly independently while sharing a few global peripheral structures, which include the sense amplifiers (to read) and the write drivers (to write). Based on this observation, we propose PALP, a new mechanism that enables partition-level parallelism within each PCM bank, and exploits such parallelism by using the memory controller's access scheduling decisions. PALP consists of three new contributions. First, we introduce new PCM commands to enable parallelism in a bank's partitions in order to resolve the read-write bank conflicts, with no changes needed to PCM logic or its interface. Second, we propose simple circuit modifications that introduce a new operating mode for the write drivers, in addition to their default mode of serving write requests. When configured in this new mode, the write drivers can resolve the read-read bank conflicts, working jointly with the sense amplifiers. Finally, we propose a new access scheduling mechanism in PCM that improves performance by prioritizing those requests that exploit partition-level parallelism over other requests, including the long outstanding ones. While doing so, the memory controller also guarantees starvation-freedom and the PCM's running-average-power-limit (RAPL). We evaluate PALP with workloads from the MiBench and SPEC CPU2017 Benchmark suites. Our results show that PALP reduces average PCM access latency by 23\%, and improves average system performance by 28\% compared to the state-of-the-art approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sridhar:2019:SEC, author = "Aditya Sridhar and Mohamed Ibrahim and Krishnendu Chakrabarty", title = "Synterface: Efficient Chip-to-World Interfacing for Flow-Based Microfluidic Biochips Using Pin-Count Minimization", journal = j-TECS, volume = "18", number = "5s", pages = "54:1--54:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358188", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358188", abstract = "Flow-based microfluidic biochips can be used to perform bioassays by manipulating a large number of on-chip valves. These biochips are increasingly used today for biomolecular recognition, single-cell screening, and point-of-care disease diagnostics, and design-automation solutions for flow-based microfluidics enable the mapping and optimization of bimolecular protocols and software-based valve control. However, a key problem that has not received adequate attention is chip-to-world interfacing, which requires the use of off-chip control equipment to provide control signals for the on-chip valves. This problem is exacerbated by the increase in the number of valves as chips get more complex. To address the interfacing problem, we present an efficient pin-count minimization (synthesis) problem, referred to as Synterface, which uses on-chip microfluidic logic gates and optimization based on concepts from linear algebra. We present results to show that Synterface significantly reduces pin-count and simplifies the external interface for flow-based microfluidics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2019:OBI, author = "Minsu Kim and Jeong-Keun Park and Sungyeol Kim and Insu Yang and Hyunsoo Jung and Soo-Mook Moon", title = "Output-based Intermediate Representation for Translation of Test-pattern Program", journal = j-TECS, volume = "18", number = "5s", pages = "55:1--55:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358186", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358186", abstract = "An Intermediate Representation (IR) used by compilers is normally generated statically, as a result of parsing or analyzing the source program. This paper proposes a completely different type of IR, generated as a result of running the source program, the output-based IR. There is a practical translation problem where such an IR is useful, in the domain of test-pattern programs. Test-pattern programs run on ATE (automatic test equipment), a special embedded system to test semiconductors such as DRAMs. They generate a pattern for each clock, a bit vector input to the pins of the chip. One issue is that different ATEs require different programming since each ATE manufacturer has its own programming language. Nonetheless, we should be able to test a memory chip on different ATEs as long as they generate the same patterns with the same speed. Therefore, a memory chipmaker wants to make a pattern program portable across ATEs, to fully utilize their ATE resources. One solution is translating between pattern programs, for which we need an IR since there are multiple source ATEs and target ATEs. Instead of a conventional, static IR, we propose using the output pattern itself as an IR. Since the pattern is independent of ATEs and easily obtainable, the output-based IR obviates designing a static IR considering all ATE programming languages and hardware differences. Moreover, we might synthesize a better target program from the IR, more optimized to the target ATE. However, the full pattern generated by a product-level pattern program is huge, so we propose using an IR of abbreviated patterns, annotated with the repetition information obtained while executing the source program. Our experimental results with product-level pattern programs show that our approach is feasible.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Silva:2019:RFG, author = "Lucas Bragan{\c{c}}a {Da Silva} and Ricardo Ferreira and Michael Canesche and Marcelo M. Menezes and Maria D. Vieira and Jeronimo Penha and Peter Jamieson and Jos{\'e} Augusto M. Nacif", title = "{READY}: a Fine-Grained Multithreading Overlay Framework for Modern {CPU--FPGA} Dataflow Applications", journal = j-TECS, volume = "18", number = "5s", pages = "56:1--56:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358187", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358187", abstract = "In this work, we propose a framework called REconfigurable Accelerator DeploY (READY), the first framework to support polynomial runtime mapping of dataflow applications in high-performance CPU-FPGA platforms. READY introduces an efficient mapping with fine-grained multithreading onto an overlay architecture that hides the latency of a global interconnection network. In addition to our overlay architecture, we show how this system helps solve some of the challenges for FPGA cloud computing adoption in high-performance computing. The framework encapsulates dataflow descriptions by using a target independent, high-level API, and a dataflow model that allows for explicit spatial and temporal parallelism. READY directly maps the dataflow kernels onto the accelerator. Our tool is flexible and extensible and provides the infrastructure to explore different accelerator designs. We validate READY on the Intel Harp platform, and our experimental results show an average 2x execution runtime improvement when compared to an 8-thread multi-core processor.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2019:MOE, author = "Sunghyun Park and Youfeng Wu and Janghaeng Lee and Amir Aupov and Scott Mahlke", title = "Multi-objective Exploration for Practical Optimization Decisions in Binary Translation", journal = j-TECS, volume = "18", number = "5s", pages = "57:1--57:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358185", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358185", abstract = "In the design of mobile systems, hardware/software (HW/SW) co-design has important advantages by creating specialized hardware for the performance or power optimizations. Dynamic binary translation (DBT) is a key component in co-design. During the translation, a dynamic optimizer in the DBT system applies various software optimizations to improve the quality of the translated code. With dynamic optimization, optimization time is an exposed run-time overhead and useful analyses are often restricted due to their high costs. Thus, a dynamic optimizer needs to make smart decisions with limited analysis information, which complicates the design of optimization decision models and often causes failures in human-made heuristics. In mobile systems, this problem is even more challenging because of strict constraints on computing capabilities and memory size. To overcome the challenge, we investigate an opportunity to build practical optimization decision models for DBT by using machine learning techniques. As the first step, loop unrolling is chosen as the representative optimization. We base our approach on the industrial strength DBT infrastructure and conduct evaluation with 17,116 unrollable loops collected from 200 benchmarks and real-life programs across various domains. By utilizing all available features that are potentially important for loop unrolling decision, we identify the best classification algorithm for our infrastructure with consideration for both prediction accuracy and cost. The greedy feature selection algorithm is then applied to the classification algorithm to distinguish its significant features and cut down the feature space. By maintaining significant features only, the best affordable classifier, which satisfies the budgets allocated to the decision process, shows 74.5\% of prediction accuracy for the optimal unroll factor and realizes an average 20.9\% reduction in dynamic instruction count during the steady-state translated code execution. For comparison, the best baseline heuristic achieves 46.0\% prediction accuracy with an average 13.6\% instruction count reduction. Given that the infrastructure is already highly optimized and the ideal upper bound for instruction reduction is observed at 23.8\%, we believe this result is noteworthy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Durr:2019:EET, author = "Marco D{\"u}rr and Georg {Von Der Br{\"u}ggen} and Kuan-Hsun Chen and Jian-Jia Chen", title = "End-to-End Timing Analysis of Sporadic Cause-Effect Chains in Distributed Systems", journal = j-TECS, volume = "18", number = "5s", pages = "58:1--58:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358181", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358181", abstract = "A cause-effect chain is used to define the logical order of data dependent tasks, which is independent from the execution order of the jobs of the (periodic/sporadic) tasks. Analyzing the worst-case End-to-End timing behavior, associated to a cause-effect chain, is an important problem in embedded control systems. For example, the detailed timing properties of modern automotive systems are specified in the AUTOSAR Timing Extensions. In this paper, we present a formal End-to-End timing analysis for distributed systems. We consider the two most important End-to-End timing semantics, i.e., the button-to-action delay (termed as the maximum reaction time) and the worst-case data freshness (termed as the maximum data age). Our contribution is significant due to the consideration of the sporadic behavior of job activations, whilst the results in the literature have been mostly limited to periodic activations. The proof strategy shows the (previously unexplored) connection between the reaction time (data age, respectively) and immediate forward (backward, respectively) job chains. Our analytical results dominate the state of the art for sporadic task activations in distributed systems and the evaluations show a clear improvement for synthesized task systems as well as for a real world automotive benchmark setting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leipnitz:2019:HLS, author = "Marcos T. Leipnitz and Gabriel L. Nazar", title = "High-Level Synthesis of Approximate Designs under Real-Time Constraints", journal = j-TECS, volume = "18", number = "5s", pages = "59:1--59:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358182", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358182", abstract = "The adoption of High-Level Synthesis (HLS) has increased as the latest HLS tools have evolved to provide high-quality results while improving productivity and time-to-market. Concurrently, many works have been proposing the incorporation of approximate computing techniques within HLS toolchains, allowing automated generation of inexact circuits for error-tolerant application domains with the aim of trading-off computation accuracy with area/power savings or performance improvements. Thus, when attempting to make a design meet timing requirements, designers of real-time systems using HLS may resort to approximation approaches. However, current approximate HLS tools do not allow specifying real-time constraints, being instead error-constrained to explore area, power, or performance optimizations. In this work, we propose an approximate HLS framework for real-time systems that can be integrated with state-of-the-art HLS tools. With this framework designers can specify real-time constraints and satisfy them while minimizing the output error. It uses scheduling information and Worst-Case Execution Time (WCET) analysis for iteratively exploring time-error trade-offs of approximations in the time-critical execution path. Experimental results on signal and image processing benchmarks show that we can reduce the WCET of exact designs by up to 35\% with acceptable quality degradation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saeed:2019:LDB, author = "Samah Mohamed Saeed and Robert Wille and Ramesh Karri", title = "Locking the Design of Building Blocks for Quantum Circuits", journal = j-TECS, volume = "18", number = "5s", pages = "60:1--60:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358184", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358184", abstract = "The research community expects that quantum computers will give economical results for particular problems on which the classical computers break down. Examples include factoring of large numbers, searching in a big database, or simulating chemical reactions to design new drugs. Attempts are ongoing to build up a practical quantum computer. Users (clients) can implement quantum circuits to run on these quantum computers. However, before running the quantum circuit on the quantum computer, the users (clients) should compile, optimize, decompose, and technology map the quantum circuit. In the current embodiment, the resulting quantum circuit runs on a remote and untrusted quantum computer server --- introducing security risks. This study explores the risk of outsourcing the quantum circuit to the quantum computer by focusing on quantum oracles. Quantum oracles are pivotal building blocks and require specialized expertise and means to design. Hence, the designer may protect this proprietary quantum oracle intellectual property (IP) and hide his/her private information. We investigate how to manage that on a quantum computer server using the IBM project QX quantum computer and Qiskit tools as an exemplar.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mohanty:2019:SPE, author = "Ram Prasad Mohanty and Hasindu Gamaarachchi and Andrew Lambert and Sri Parameswaran", title = "{SWARAM}: Portable Energy and Cost Efficient Embedded System for Genomic Processing", journal = j-TECS, volume = "18", number = "5s", pages = "61:1--61:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358211", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358211", abstract = "Treatment of patients using high-quality precision medicine requires a thorough understanding of the genetic composition of a patient. Ideally, the identification of unique variations in an individual's genome is needed for specifying the necessary treatment. Variant calling workflow is a pipeline of tools, integrating state of the art software systems aimed at alignment, sorting and variant calling for the whole genome sequencing (WGS) data. This pipeline is utilized for identifying unique variations in an individual's genome (compared to a reference genome). Currently, such a workflow is implemented on high-performance computers (with additional GPUs or FPGAs) or in cloud computers. Such systems are large, have a high cost, and rely on the internet for genome data transfer which makes the system unusable in remote locations unequipped with internet connectivity. It further raises privacy concerns due to processing being carried out in a different facility. To overcome such limitations, in this paper, for the first time, we present a cost-efficient, offline, scalable, portable, and energy-efficient computing system named SWARAM for variant calling workflow processing. The system uses novel architecture and algorithms to match against partial reference genomes to exploit smaller memory sizes which are typically available in tiny processing systems. Extensive tests on a standard benchmark data-set (NA12878 Illumina platinum genome) confirm that the time consumed for the data transfer and completing variant calling workflow on SWARAM was competitive to that of a 32-core Intel Xeon server with similar accuracy, but costs less than a fifth, and consumes less than 40\% of the energy of the server system. The original scripts and code we developed for executing the variant calling workflow on SWARAM are available in the associated Github repository https://github.com/Rammohanty/swaram.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2019:AAI, author = "Jihye Kim and Jiwon Lee and Hankyung Ko and Donghwan Oh and Semin Han and Gwonho Jeong and Hyunok Oh", title = "{AuthCropper}: Authenticated Image Cropper for Privacy Preserving Surveillance Systems", journal = j-TECS, volume = "18", number = "5s", pages = "62:1--62:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358195", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358195", abstract = "As surveillance systems are popular, the privacy of the recorded video becomes more important. On the other hand, the authenticity of video images should be guaranteed when used as evidence in court. It is challenging to satisfy both (personal) privacy and authenticity of a video simultaneously, since the privacy requires modifications (e.g., partial deletions) of an original video image while the authenticity does not allow any modifications of the original image. This paper proposes a novel method to convert an encryption scheme to support partial decryption with a constant number of keys and construct a privacy-aware authentication scheme by combining with a signature scheme. The security of our proposed scheme is implied by the security of the underlying encryption and signature schemes. Experimental results show that the proposed scheme can handle the UHD video stream with more than 17 fps on a real embedded system, which validates the practicality of the proposed scheme.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fong:2019:ODS, author = "Daniel D. Fong and Vivek J. Srinivasan and Kourosh Vali and Soheil Ghiasi", title = "Optode Design Space Exploration for Clinically-robust Non-invasive Fetal Oximetry", journal = j-TECS, volume = "18", number = "5s", pages = "63:1--63:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358207", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358207", abstract = "Non-invasive transabdominal fetal oximetry (TFO) has the potential to improve delivery outcomes by providing physicians with an objective metric of fetal well-being during labor. Fundamentally, the technology is based on sending light through the maternal abdomen to investigate deep fetal tissue, followed by detection and processing of the light that returns (via scattering) to the outside of the maternal abdomen. The placement of the photodetector in relation to the light source critically impacts TFO system performance, including its operational robustness in the face of fetal depth variation. However, anatomical differences between pregnant women cause the fetal depths to vary drastically, which further complicates the optical probe (optode) design optimization. In this paper, we present a methodology to solve this problem. We frame optode design space exploration as a multi-objective optimization problem, where hardware complexity (cost) and performance across a wider patient population (robustness) form competing objectives. We propose a model-based approach to characterize the Pareto-optimal points in the optode design space, through which a specific design is selected. Experimental evaluation via simulation and in vivo measurement on pregnant sheep support the efficacy of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Siddhu:2019:PLA, author = "Lokesh Siddhu and Preeti Ranjan Panda", title = "{PredictNcool}: Leakage Aware Thermal Management for {$3$D} Memories Using a Lightweight Temperature Predictor", journal = j-TECS, volume = "18", number = "5s", pages = "64:1--64:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358208", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358208", abstract = "Recent research on mitigating thermal problems in 3D memories has covered reactive strategies that reduce memory power consumption, and thereby, performance, when the memory temperature reaches the maximum operating limit. Such techniques could benefit from temperature prediction and avoid unnecessary invocations and state transitions of the thermal management strategy. We develop an accurate steady state temperature predictor for thermal management of 3D memories. We utilize the symmetries in the floorplan, along with other design insights, to reduce the predictor's model parameters, making it lightweight and suitable for runtime thermal management. Using the temperature prediction, we introduce PredictNcool, a proactive thermal management strategy to reduce application runtime and memory energy. We compare PredictNcool with two recent thermal management strategies and our experiments show that the proposed optimization results in performance improvements of 28\% and 5\%, and memory subsystem energy reductions of 38\% and 12\% (on average).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2019:RFD, author = "Chenlin Ma and Zhaoyan Shen and Lei Han and Zili Shao", title = "{RMW-F}: a Design of {RMW-Free} Cache Using Built-in {NAND-Flash} for {SMR} Storage", journal = j-TECS, volume = "18", number = "5s", pages = "65:1--65:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358210", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358210", abstract = "Shingled Magnetic Recording (SMR) disks have been proposed as a high-density, non-volatile media and precede traditional hard disk drives in both storing capacity and cost. However, the intrinsic characteristics of SMR disks raise a major performance challenge named read-modify-write operations (RMWs) that are time-consuming and can significantly degrade the overall system performance. Current designs of SMR disks usually adopt a persistent cache to alleviate the negative effect brought by RMWs and the cache is used as a first-level cache to buffer all the incoming writes of the whole SMR storage system. In this paper, we propose to change the functionality of the cache, that is, the cache will no longer serve as a first-level cache like previous. Incoming data are distinguished according to their different write-back behavior and those data which will incur RMWs will be left in our built-in NAND flash cache called RMW-free Cache (RMW-F) to eliminate the need of RMWs. Besides, RMW-F improves the cleaning efficiency by a model that takes both write-back cost and data popularity into considerations. Our experimental results show that RMW-F can achieve both system performance and cleaning efficiency improvements.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2019:ESW, author = "Yu-Pei Liang and Tseng-Yi Chen and Yuan-Hao Chang and Shuo-Han Chen and Kam-Yiu Lam and Wei-Hsin Li and Wei-Kuan Shih", title = "Enabling Sequential-write-constrained {B+}-tree Index Scheme to Upgrade Shingled Magnetic Recording Storage Performance", journal = j-TECS, volume = "18", number = "5s", pages = "66:1--66:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358201", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358201", abstract = "When a shingle magnetic recording (SMR) drive has been widely applied to modern computer systems (e.g., archive file systems, big data computing systems, and large-scale database systems), storage system developers should thoroughly review whether current designs (e.g., index schemes and data placements) are appropriate for an SMR drive because of its sequential write constraint. Through many prior works excellently manage data in an SMR drive by integrating their proposed solutions into the driver layer, an index scheme over an SMR drive has never been optimized by any previous works because managing index over the SMR drive needs to jointly consider the properties of B$^+$ -tree and SMR natures (e.g., sequential write constraint and zone partitions) in a host storage system. Moreover, poor index management will result in terrible storage performance because an index manager is extensively used in file systems and database applications. For optimizing the B$^+$ -tree index structure over an SMR storage, this work identifies performance overheads caused by the B$^+$ -tree index structure in an SMR drive. By such observation, this study proposes a sequential-write-constrained B$^+$ -tree index scheme, namely SW-B$^+$ tree, which consists of an address redirection data structure, an SMR-aware node allocation mechanism, and a frequency-aware garbage collection strategy. According to our experiments, the SW-B$^+$ tree can improve the SMR storage performance 55\% on average.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2019:ASL, author = "Weiwen Jiang and Edwin H.-M. Sha and Xinyi Zhang and Lei Yang and Qingfeng Zhuge and Yiyu Shi and Jingtong Hu", title = "Achieving Super-Linear Speedup across Multi-{FPGA} for Real-Time {DNN} Inference", journal = j-TECS, volume = "18", number = "5s", pages = "67:1--67:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358192", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358192", abstract = "Real-time Deep Neural Network (DNN) inference with low-latency requirement has become increasingly important for numerous applications in both cloud computing (e.g., Apple's Siri) and edge computing (e.g., Google/Waymo's driverless car). FPGA-based DNN accelerators have demonstrated both superior flexibility and performance; in addition, for real-time inference with low batch size, FPGA is expected to achieve further performance improvement. However, the performance gain from the single-FPGA design is obstructed by the limited on-chip resource. In this paper, we employ multiple FPGAs to cooperatively run DNNs with the objective of achieving super-linear speed-up against single-FPGA design. In implementing such systems, we found two barriers that hinder us from achieving the design goal: (1) the lack of a clear partition scheme for each DNN layer to fully exploit parallelism, and (2) the insufficient bandwidth between the off-chip memory and the accelerator due to the growing size of DNNs. To tackle these issues, we propose a general framework, ``Super-LIP'', which can support different kinds of DNNs. In this paper, we take Convolutional Neural Network (CNN) as a vehicle to illustrate Super-LIP. We first formulate an accurate system-level model to support the exploration of best partition schemes. Then, we develop a novel design methodology to effectively alleviate the heavy loads on memory bandwidth by moving traffic from memory bus to inter-FPGA links. We implement Super-LIP based on ZCU102 FPGA boards. Results demonstrate that Super-LIP with 2 FPGAs can achieve $ 3.48 \times $ speedup, compared to the state-of-the-art single-FPGA design. What is more, as the number of FPGAs scales up, the system latency can be further reduced while maintaining high energy efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2019:ALA, author = "Wei-Chen Wang and Yuan-Hao Chang and Tei-Wei Kuo and Chien-Chung Ho and Yu-Ming Chang and Hung-Sheng Chang", title = "Achieving Lossless Accuracy with Lossy Programming for Efficient Neural-Network Training on {NVM}-Based Systems", journal = j-TECS, volume = "18", number = "5s", pages = "68:1--68:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358191", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358191", abstract = "Neural networks over conventional computing platforms are heavily restricted by the data volume and performance concerns. While non-volatile memory offers potential solutions to data volume issues, challenges must be faced over performance issues, especially with asymmetric read and write performance. Beside that, critical concerns over endurance must also be resolved before non-volatile memory could be used in reality for neural networks. This work addresses the performance and endurance concerns altogether by proposing a data-aware programming scheme. We propose to consider neural network training jointly with respect to the data-flow and data-content points of view. In particular, methodologies with approximate results over Dual-SET operations were presented. Encouraging results were observed through a series of experiments, where great efficiency and lifetime enhancement is seen without sacrificing the result accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2019:DAS, author = "Zhengguo Chen and Quan Deng and Nong Xiao and Kirk Pruhs and Youtao Zhang", title = "{DWMAcc}: Accelerating Shift-based {CNNs} with Domain Wall Memories", journal = j-TECS, volume = "18", number = "5s", pages = "69:1--69:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358199", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358199", abstract = "PIM (processing-in-memory) based hardware accelerators have shown great potentials in addressing the computation and memory access intensity of modern CNNs (convolutional neural networks). While adopting NVM (non-volatile memory) helps to further mitigate the storage and energy consumption overhead, adopting quantization, e.g., shift-based quantization, helps to tradeoff the computation overhead and the accuracy loss, integrating both NVM and quantization in hardware accelerators leads to sub-optimal acceleration. In this paper, we exploit the natural shift property of DWM (domain wall memory) to devise DWMAcc, a DWM-based accelerator with asymmetrical storage of weight and input data, to speed up the inference phase of shift-based CNNs. DWMAcc supports flexible shift operations to enable fast processing with low performance and area overhead. We then optimize it with zero-sharing, input-reuse, and weight-share schemes. Our experimental results show that, on average, DWMAcc achieves $ 16.6 \times $ performance improvement and $ 85.6 \times $ energy consumption reduction over a state-of-the-art SRAM based design.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dave:2019:DEP, author = "Shail Dave and Youngbin Kim and Sasikanth Avancha and Kyoungwoo Lee and Aviral Shrivastava", title = "{dMazeRunner}: Executing Perfectly Nested Loops on Dataflow Accelerators", journal = j-TECS, volume = "18", number = "5s", pages = "70:1--70:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358198", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358198", abstract = "Dataflow accelerators feature simplicity, programmability, and energy-efficiency and are visualized as a promising architecture for accelerating perfectly nested loops that dominate several important applications, including image and media processing and deep learning. Although numerous accelerator designs are being proposed, how to discover the most efficient way to execute the perfectly nested loop of an application onto computational and memory resources of a given dataflow accelerator (execution method) remains an essential and yet unsolved challenge. In this paper, we propose dMazeRunner --- to efficiently and accurately explore the vast space of the different ways to spatiotemporally execute a perfectly nested loop on dataflow accelerators (execution methods). The novelty of dMazeRunner framework is in: (i) a holistic representation of the loop nests, that can succinctly capture the various execution methods, (ii) accurate energy and performance models that explicitly capture the computation and communication patterns, data movement, and data buffering of the different execution methods, and (iii) drastic pruning of the vast search space by discarding invalid solutions and the solutions that lead to the same cost. Our experiments on various convolution layers (perfectly nested loops) of popular deep learning applications demonstrate that the solutions discovered by dMazeRunner are on average $ 9.16 \times $ better in Energy-Delay-Product (EDP) and $ 5.83 \times $ better in execution time, as compared to prior approaches. With additional pruning heuristics, dMazeRunner reduces the search time from days to seconds with a mere 2.56\% increase in EDP, as compared to the optimal solution.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Spellini:2019:CDM, author = "Stefano Spellini and Michele Lora and Franco Fummi and Sudipta Chattopadhyay", title = "Compositional Design of Multi-Robot Systems Control Software on {ROS}", journal = j-TECS, volume = "18", number = "5s", pages = "71:1--71:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358197", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358197", abstract = "This paper presents a methodology that relies on Assume-Guarantee Contracts to decompose the problem of synthesizing control software for a multi-robot system. Initially, each contract describes either a component ( e.g., a robot) or an aspect of the system. Then, the design problem is decomposed into different synthesis and verification sub-problems, allowing to tackle the complexity involved in the design process. The design problem is then recomposed by exploiting the rigorousness provided by contracts. This allows us to achieve system-level simulation capable to be used for validating the entire design. Once validated, the software synthesized during the process can be integrated into Robot Operating System (ROS) nodes and executed using state-of-the-practice packages and tools for modern robotic systems. We apply the methodology to generate a control strategy for an autonomous goods transportation system. Our results show a massive reduction of the time required to obtain automatically the control software implementing a multi-robot mission.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mendis:2019:ADU, author = "Hashan Roshantha Mendis and Pi-Cheng Hsiu", title = "Accumulative Display Updating for Intermittent Systems", journal = j-TECS, volume = "18", number = "5s", pages = "72:1--72:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358190", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358190", abstract = "Electrophoretic displays are ideal for self-powered systems, but currently require an uninterrupted power supply to carry out the full display update cycle. Although sensible for battery-powered devices, when directly applied to intermittently-powered systems, guaranteeing display update atomicity usually results in repeated execution until completion or can incur high hardware/software overheads, heavy programmer intervention and large energy buffering requirements to provide sufficient display update energy. This paper introduces the concept, design and implementation of accumulative display updating, which relaxes the atomicity constraints of display updating, such that the display update process can be accumulatively completed across power cycles, without the need for sufficient energy for the entire display update. To allow for process logical continuity, we track the update progress during execution and facilitate a safe display shutdown procedure to overcome physical and operability issues related to abrupt power failure. Additionally, a context-aware updating policy is proposed to handle data freshness issues, where the delay in addressing new update requests can cause the display contents to be in conflict with new data available. Experimental results on a Texas Instruments device with an integrated electrophoretic display show that, compared to atomic display updating, our design can significantly increase accurate forward progress, decrease the average response time of display updating and reduce time and energy wastage when displaying fresh data.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seyoum:2019:FFO, author = "Biruk B. Seyoum and Alessandro Biondi and Giorgio C. Buttazzo", title = "{FLORA}: {FLoorplan} Optimizer for Reconfigurable Areas in {FPGAs}", journal = j-TECS, volume = "18", number = "5s", pages = "73:1--73:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358202", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358202", abstract = "Floorplanning is a mandatory step in the design of hardware accelerators for FPGA platforms, especially when adopting dynamic partial reconfiguration (DPR). This paper presents FLORA, an automated floorplanner based on optimization via Mixed-Integer Linear Programming (MILP). The floorplanning problem is solved by means of a novel fine-grained modeling strategy of FPGA resources. Furthermore, differently from other proposals, our approach takes into account several realistic Partial Reconfiguration (PR) floorplanning constraints on FPGAs. FLORA was compared against state-of-the-art floorplanners by means of benchmark suites, showing that it is capable of providing better performance in terms of resource consumption, maximum inter-region, wire-length, and running time required to produce the solutions. Finally, FLORA was utilized to generate placements for a partially-reconfigurable video processing engine that was implemented on a Xilinx Zynq-7020.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Moazzemi:2019:HFL, author = "Kasra Moazzemi and Biswadip Maity and Saehanseul Yi and Amir M. Rahmani and Nikil Dutt", title = "{HESSLE--FREE}: Heterogeneous Systems Leveraging Fuzzy Control for Runtime Resource Management", journal = j-TECS, volume = "18", number = "5s", pages = "74:1--74:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358203", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358203", abstract = "As computing platforms increasingly embrace heterogeneity, runtime resource managers need to efficiently, dynamically, and robustly manage shared resources (e.g., cores, power budgets, memory bandwidth). To address the complexities in heterogeneous systems, state-of-the-art techniques that use heuristics or machine learning have been proposed. On the other hand, conventional control theory can be used for formal guarantees, but may face unmanageable complexity for modeling system dynamics of complex heterogeneous systems. We address this challenge through HESSLE-FREE (Heterogeneous Systems Leveraging Fuzzy Control for Runtime Resource Management): an approach leveraging fuzzy control theory that combines the strengths of classical control theory together with heuristics to form a light-weight, agile, and efficient runtime resource manager for heterogeneous systems. We demonstrate the efficacy of HESSLE-FREE executing on a NVIDIA Jetson TX2 platform (containing a heterogeneous multi-processor with a GPU) to show that HESSLE-FREE: (1) provides opportunity for optimization in the controller and stability analysis to enhance the confidence in the reliability of the system; (2) coordinates heterogeneous compute units to achieve desired objectives (e.g., QoS, optimal power references, FPS) efficiently and with lower complexity, and (3) eases the burden of system specification.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vashist:2019:UTS, author = "Abhishek Vashist and Andrew Keats and Sai Manoj Pudukotai Dinakarrao and Amlan Ganguly", title = "Unified Testing and Security Framework for Wireless Network-on-Chip Enabled Multi-Core Chips", journal = j-TECS, volume = "18", number = "5s", pages = "75:1--75:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358212", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358212", abstract = "On-chip wireless interconnects have been demonstrated to improve the performance and energy consumption of data communication in Network-on-Chips (NoCs). However, the wireless interfaces (WIs) can be defective, rendering these broken links severely affect the performance. This makes manufacturing test of the WIs critical. While analog testing of the transceivers is possible, such methodologies are impractical in a Wireless NoC (WiNoC) due to large overheads. In addition to testing, security is another prominent challenge in WiNoCs, as the security breach can happen due to embedded hardware Trojans or through external attacker exploiting the wireless medium. The typical security measures used in general wireless networks are not practical in a WiNoC due to unique network architectures and performance requirements of such a system. However, both testing and security defense can potentially leverage a basic monitoring framework which, can detect malfunctions or anomalies. Based on this idea, we propose a unified architecture for testing and attack detection and protection of on-chip wireless interconnects. We adopt a Built-In-Self Test (BIST) methodology to enable online monitoring of the wireless interconnects which can also be reused for monitoring the security threats. We focus on manufacturing defects of the WIs for testing and persistent jamming attack for the security measures, as this kind of attack is most likely on wireless communication systems. The BIST methodology is capable of detecting faults in the wireless links with a low aliasing probability of $ 2.32 \times 10^{-10} $. Additionally, the proposed unified architecture is able to detect the persistent jamming with an accuracy of 99.87\% and suffer $<$ 3\% communication bandwidth degradation even in the presence of attacks from either internal or external sources.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dugo:2019:CLC, author = "Alexy Torres Aurora Dugo and Jean-Baptiste Lefoul and Felipe Gohring {De Magalhaes} and Dahman Assal and Gabriela Nicolescu", title = "Cache Locking Content Selection Algorithms for {ARINC-653} Compliant {RTOS}", journal = j-TECS, volume = "18", number = "5s", pages = "76:1--76:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358196", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358196", abstract = "Avionic software is the subject of stringent real time, determinism and safety constraints. Software designers face several challenges, one of them being the interferences that appear in common situations, such as resource sharing. The interferences introduce non-determinism and delays in execution time. One of the main interference prone resources are cache memories. In single-core processors, caches comprise multiple private levels. This breaks the isolation principle imposed by avionic standards, such as the ARINC-653. This standard defines partitioned architectures where one partition should never directly interfere with another one. In cache-based architectures, one partition can modify the cache content of another partition. In this paper, we propose a method based on cache locking to reduce the non-determinism and the contention on lower level memories while improving the time performances.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deshwal:2019:MMO, author = "Aryan Deshwal and Nitthilan Kanappan Jayakodi and Biresh Kumar Joardar and Janardhan Rao Doppa and Partha Pratim Pande", title = "{MOOS}: a Multi-Objective Design Space Exploration and Optimization Framework for {NoC} Enabled Manycore Systems", journal = j-TECS, volume = "18", number = "5s", pages = "77:1--77:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358206", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358206", abstract = "The growing needs of emerging applications has posed significant challenges for the design of optimized manycore systems. Network-on-Chip (NoC) enables the integration of a large number of processing elements (PEs) in a single die. To design optimized manycore systems, we need to establish suitable trade-offs among multiple objectives including power, performance, and thermal. Therefore, we consider multi-objective design space exploration (MO-DSE) problems arising in the design of NoC-enabled manycore systems: placement of PEs and communication links to optimize two or more objectives (e.g., latency, energy, and throughput). Existing algorithms to solve MO-DSE problems suffer from scalability and accuracy challenges as size of the design space and the number of objectives grow. In this paper, we propose a novel framework referred as Multi-Objective Optimistic Search (MOOS) that performs adaptive design space exploration using a data-driven model to improve the speed and accuracy of multi-objective design optimization process. We apply MOOS to design both 3D heterogeneous and homogeneous manycore systems using Rodinia, PARSEC, and SPLASH2 benchmark suites. We demonstrate that MOOS improves the speed of finding solutions compared to state-of-the-art methods by up to 13X while uncovering designs that are up to 20\% better in terms of NoC. The optimized 3D manycore systems improve the EDP up to 38\% when compared to 3D mesh-based designs optimized for the placement of PEs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Smirnov:2019:IGM, author = "Fedor Smirnov and Behnaz Pourmohseni and Michael Gla{\ss} and J{\"u}rgen Teich", title = "{IGOR}, Get Me the Optimum! {Prioritizing} Important Design Decisions During the {DSE} of Embedded Systems", journal = j-TECS, volume = "18", number = "5s", pages = "78:1--78:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358204", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358204", abstract = "Design Space Exploration (DSE) techniques for complex embedded systems must cope with a huge variety of applications and target architectures as well as a wide spectrum of objectives and constraints. In particular, existing design automation approaches are either problem-independent, in that they do not exploit any knowledge about the optimization problem at hand, or are tailored to specific a priori assumptions about the problem and/or a specific set of design objectives. While the latter are only applicable within a very limited scope of design problems, the former may struggle to deliver high-quality solutions for problems with large design spaces and/or complex design objectives. As a remedy, we propose Importance-Guided Order Rearrangement (IGOR) as a novel approach for DSE of embedded systems. Instead of relying on an a priori problem knowledge, IGOR uses a machine-learning-inspired technique to dynamically analyze the importance of design decisions, i.e., the impact that these decisions-within the specific problem that is being optimized-have on the quality of explored problem solutions w.r.t. the given design objectives. Throughout the DSE, IGOR uses this information to guide the optimization towards the most promising regions of the design space. Experimental results for a variety of applications from different domains of embedded computing and for different optimization scenarios give evidence that the proposed approach is both scalable and adaptable, as it can be used for the optimization of systems described by several thousands constraints, where it outperforms both problem-specific and problem-independent optimization approaches and achieves $ \epsilon $-dominance improvements of up to 95\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cheng:2019:AVE, author = "Zhongqi Cheng and Rainer D{\"o}mer", title = "Analyzing Variable Entanglement for Parallel Simulation of {SystemC TLM-2.0} Models", journal = j-TECS, volume = "18", number = "5s", pages = "79:1--79:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358194", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358194", abstract = "The SystemC TLM-2.0 standard is widely used in modern electronic system level design for better interoperability and higher simulation speed. However, TLM-2.0 has been identified as an obstacle for parallel SystemC simulation due to the disappearance of channels. Without a containment construct, simulation threads are permitted to directly access data of other modules and that makes it difficult to synchronize such accesses as required by the SystemC execution semantics. In this paper, we propose a compile time approach to statically analyze potential conflicts among threads in SystemC TLM-2.0 loosely- and approximately-timed models. We introduce a new Socket Call Path technique which provides the compiler with socket binding information for precise static analysis. We also propose an algorithm to analyze entangled variable pairs. Experimental results show that our approach is able to support automatically safe parallel simulation of SystemC models with TLM-2.0 Blocking Transport Interface, Direct Memory Interface and Non-blocking Transport Interface, resulting in impressive simulation speeds.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2019:ETM, author = "Minjun Seo and Fadi Kurdahi", title = "Efficient Tracing Methodology Using Automata Processor", journal = j-TECS, volume = "18", number = "5s", pages = "80:1--80:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358200", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358200", abstract = "Tracing or trace interface has been used in various ways to find system defects or bugs. As embedded systems are increasingly used in safety-critical applications, tracing can provide useful information during system execution at runtime. Non-intrusive tracing that does not affect system performance has become especially important, but unfortunately, the biggest obstacle to this approach was the vast amount of real-time trace data, making it challenging to address complex requirements with relatively limited hardware implementations. Automata processors can be programmed with a memory-like structure of automata and have a structure specific to streaming data, large capacity, and parallel processing functions. This paper promotes the idea of high-level system-on-chip monitoring using automata processors. We used a safety-critical pacemaker application in the experiments, described timed automata (TA)-based requirements, and tested intentionally injected 4,000 random failures. The TA model converted for Automata Processor to monitor system, correctness, and safety properties achieved 100\% failure detection rate in the experiment, and the detected failure is reported as fast enough to allow enough extent for failure recovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brais:2019:AAM, author = "Hadi Brais and Preeti Ranjan Panda", title = "{Alleria}: an Advanced Memory Access Profiling Framework", journal = j-TECS, volume = "18", number = "5s", pages = "81:1--81:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358193", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358193", abstract = "Application analysis and simulation tools are used extensively by embedded system designers to improve existing optimization techniques or develop new ones. We propose the Alleria framework to make it easier for designers to comprehensively collect critical information such as virtual and physical memory addresses, accessed values, and thread schedules about one or more target applications. Such profilers often incur substantial performance overheads that are orders of magnitude larger than native execution time. We discuss how that overhead can be significantly reduced using a novel profiling mechanism called adaptive profiling. We develop a heuristic-based adaptive profiling mechanism and evaluate its performance using single-threaded and multi-threaded applications. The proposed technique can improve profiling throughput by up to 145\% and by 37\% on an average, enabling Alleria to be used to comprehensively profile applications with a throughput of over 3 million instructions per second.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhardwaj:2019:MCA, author = "Kartikeya Bhardwaj and Ching-Yi Lin and Anderson Sartor and Radu Marculescu", title = "Memory- and Communication-Aware Model Compression for Distributed Deep Learning Inference on {IoT}", journal = j-TECS, volume = "18", number = "5s", pages = "82:1--82:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358205", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358205", abstract = "Model compression has emerged as an important area of research for deploying deep learning models on Internet-of-Things (IoT). However, for extremely memory-constrained scenarios, even the compressed models cannot fit within the memory of a single device and, as a result, must be distributed across multiple devices. This leads to a distributed inference paradigm in which memory and communication costs represent a major bottleneck. Yet, existing model compression techniques are not communication-aware. Therefore, we propose Network of Neural Networks (NoNN), a new distributed IoT learning paradigm that compresses a large pretrained `teacher' deep network into several disjoint and highly-compressed `student' modules, without loss of accuracy. Moreover, we propose a network science-based knowledge partitioning algorithm for the teacher model, and then train individual students on the resulting disjoint partitions. Extensive experimentation on five image classification datasets, for user-defined memory/performance budgets, show that NoNN achieves higher accuracy than several baselines and similar accuracy as the teacher model, while using minimal communication among students. Finally, as a case study, we deploy the proposed model for CIFAR-10 dataset on edge devices and demonstrate significant improvements in memory footprint (up to $ 24 \times $), performance (up to $ 12 \times $), and energy per node (up to $ 14 \times $) compared to the large teacher model. We further show that for distributed inference on multiple edge devices, our proposed NoNN model results in up to $ 33 \times $ reduction in total latency w.r.t. a state-of-the-art model compression baseline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Barijough:2019:QLA, author = "Kamyar Mirzazad Barijough and Zhuoran Zhao and Andreas Gerstlauer", title = "Quality\slash Latency-Aware Real-time Scheduling of Distributed Streaming {IoT} Applications", journal = j-TECS, volume = "18", number = "5s", pages = "83:1--83:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358209", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358209", abstract = "Embedded systems are increasingly networked and distributed, often, such as in the Internet of Things (IoT), over open networks with potentially unbounded delays. A key challenge is the need for real-time guarantees over such inherently unreliable and unpredictable networks. Generally, timeouts are used to provide timing guarantees while trading off data losses and quality. The schedule of distributed task executions and network timeouts thereby determines a fundamental latency-quality trade-off that is, however, not taken into account by existing scheduling algorithms. In this paper, we propose an approach for scheduling of distributed, real-time streaming applications under quality-latency goals. We formulate this as a problem of analytically deriving a static worst-case schedule of a given distributed dataflow graph that minimizes quality loss while meeting guaranteed latency constraints. Towards this end, we first develop a quality model that estimates SNR of distributed streaming applications under given network characteristics and an overall linearity assumption. Using this quality model, we then formulate and solve the scheduling of distributed dataflow graphs as a numerical optimization problem. Simulation results with random graphs show that quality/latency-aware scheduling improves SNR over a baseline schedule by 50\% on average. When applied to a distributed neural network application for handwritten digit recognition, our scheduling methodology can improve classification accuracy by 10\% over a naive distribution under tight latency constraints.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2019:DES, author = "Youchao Wang and Sam Willis and Vasileios Tsoutsouras and Phillip Stanley-Marbell", title = "Deriving Equations from Sensor Data Using Dimensional Function Synthesis", journal = j-TECS, volume = "18", number = "5s", pages = "84:1--84:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358218", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358218", abstract = "We present a new method for deriving functions that model the relationship between multiple signals in a physical system. The method, which we call dimensional function synthesis, applies to data streams where the dimensions of the signals are known. The method comprises two phases: a compile-time synthesis phase and a subsequent calibration using sensor data. We implement dimensional function synthesis and use the implementation to demonstrate efficiently summarizing multi-modal sensor data for two physical systems using 90 laboratory experiments and 10,000 synthetic idealized measurements. We evaluate the performance of the compile-time phase of dimensional function synthesis as well as the calibration phase overhead, inference latency, and accuracy of the models our method generates. The results show that our technique can generate models in less than 300 ms on average across all the physical systems we evaluated. When calibrated with sensor data, our models outperform traditional regression and neural network models in inference accuracy in all the cases we evaluated. In addition, our models perform better in training latency (over $ 8660 \times $ improvement) and required arithmetic operations in inference (over $ 34 \times $ improvement). These significant gains are largely the result of exploiting information on the physics of signals that has hitherto been ignored.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dai:2019:DMS, author = "Xiaotian Dai and Wanli Chang and Shuai Zhao and Alan Burns", title = "A Dual-Mode Strategy for Performance-Maximisation and Resource-Efficient {CPS} Design", journal = j-TECS, volume = "18", number = "5s", pages = "85:1--85:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358213", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358213", abstract = "The emerging scenarios of cyber-physical systems (CPS), such as autonomous vehicles, require implementing complex functionality with limited resources, as well as high performances. This paper considers a common setup in which multiple control and non-control tasks share one processor, and proposes a dual-mode strategy. The control task switches between two sampling periods when rejecting (coping with) a disturbance. We create an optimisation framework looking for the switching sampling periods and time instants that maximise the control performance (indexed by settling time) and resource efficiency (indexed by the number of tasks that are schedulable on the processor). The latter objective is enabled with schedulability analysis tailored for the dual-mode model. Experimental results show that (i) given a set of tasks, the proposed strategy improves the control performances whilst retaining schedulability; and (ii) given requirements on the control performances, the proposed strategy is able to schedule more tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Passerone:2019:CEC, author = "Roberto Passerone and {\'I}{\~n}igo {\'I}ncer Romeo and Alberto L. Sangiovanni-Vincentelli", title = "Coherent Extension, Composition, and Merging Operators in Contract Models for System Design", journal = j-TECS, volume = "18", number = "5s", pages = "86:1--86:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358216", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358216", abstract = "Contract models have been proposed to promote and facilitate reuse and distributed development. In this paper, we cast contract models into a coherent formalism used to derive general results about the properties of their operators. We study several extensions of the basic model, including the distinction between weak and strong assumptions and maximality of the specification. We then analyze the disjunction and conjunction operators, and show how they can be broken up into a sequence of simpler operations. This leads to the definition of a new contract viewpoint merging operator, which better captures the design intent in contrast to the more traditional conjunction. The adjoint operation, which we call separation, can be used to re-partition the specification into different viewpoints. We show the symmetries of these operations with respect to composition and quotient.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bataineh:2019:EDL, author = "Omar Bataineh and David S. Rosenblum and Mark Reynolds", title = "Efficient Decentralized {LTL} Monitoring Framework Using Tableau Technique", journal = j-TECS, volume = "18", number = "5s", pages = "87:1--87:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358219", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358219", abstract = "This paper presents a novel framework for decentralized monitoring of Linear Temporal Logic (LTL) formulas, under the situation where processes are synchronous and the formula is represented as a tableau. The tableau technique allows one to construct a semantic tree for the input LTL formula, which can be used to optimize the decentralized monitoring of LTL in various ways. Given a system P and an LTL formula $ \varphi $, we construct a tableau $ T_\varphi $. The tableau $ T_\varphi $ is used for two purposes: (a) to synthesize an efficient round-robin communication policy for processes, and (b) to find the minimal ways to decompose the formula and communicate observations of processes in an efficient way. In our framework, processes can propagate truth values of both atomic and compound formulas (non-atomic formulas) depending on the syntactic structure of the input LTL formula and the observation power of processes. We demonstrate that this approach of decentralized monitoring based on tableau construction is more straightforward, more flexible, and more likely to yield efficient solutions than alternative approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baumeister:2019:FSM, author = "Jan Baumeister and Bernd Finkbeiner and Maximilian Schwenger and Hazem Torfah", title = "{FPGA} Stream-Monitoring of Real-time Properties", journal = j-TECS, volume = "18", number = "5s", pages = "88:1--88:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358220", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358220", abstract = "An essential part of cyber-physical systems is the online evaluation of real-time data streams. Especially in systems that are intrinsically safety-critical, a dedicated monitoring component inspecting data streams to detect problems at runtime greatly increases the confidence in a safe execution. Such a monitor needs to be based on a specification language capable of expressing complex, high-level properties using only the accessible low-level signals. Moreover, tight constraints on computational resources exacerbate the requirements on the monitor. Thus, several existing approaches to monitoring are not applicable due to their dependence on an operating system. We present an FPGA-based monitoring approach by compiling an RTL ola specification into synthesizable VHDL code. RTLola is a stream-based specification language capable of expressing complex real-time properties while providing an upper bound on the execution time and memory requirements. The statically determined memory bound allows for a compilation to an FPGA with a fixed size. An advantage of FPGAs is a simple integration process in existing systems and superb executing time. The compilation results in a highly parallel implementation thanks to the modular nature of RTLola specifications. This further increases the maximal event rate the monitor can handle.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bajczi:2019:WMP, author = "Levente Bajczi and Andr{\'a}s V{\"o}r{\"o}s and Vince Moln{\'a}r", title = "Will My Program Break on This Faulty Processor?: {Formal} Analysis of Hardware Fault Activations in Concurrent Embedded Software", journal = j-TECS, volume = "18", number = "5s", pages = "89:1--89:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358238", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358238", abstract = "Formal verification is approaching a point where it will be reliably applicable to embedded software. Even though formal verification can efficiently analyze multi-threaded applications, multi-core processors are often considered too dangerous to use in critical systems, despite the many benefits they can offer. One reason is the advanced memory consistency model of such CPUs. Nowadays, most software verifiers assume strict sequential consistency, which is also the na{\"\i}ve view of programmers. Modern multi-core processors, however, rarely guarantee this assumption by default. In addition, complex processor architectures may easily contain design faults. Thanks to the recent advances in hardware verification, these faults are increasingly visible and can be detected even in existing processors, giving an opportunity to compensate for the problem in software. In this paper, we propose a generic approach to consider inconsistent behavior of the hardware in the analysis of software. Our approach is based on formal methods and can be used to detect the activation of existing hardware faults on the application level and facilitate their mitigation in software. The approach relies heavily on recent results of model checking and hardware verification and offers new, integrative research directions. We propose a partial solution based on existing model checking tools to demonstrate feasibility and evaluate their performance in this context.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2019:TAS, author = "Youngmoon Lee and Kang G. Shin and Hoon Sung Chwa", title = "Thermal-Aware Scheduling for Integrated {CPUs--GPU} Platforms", journal = j-TECS, volume = "18", number = "5s", pages = "90:1--90:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358235", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358235", abstract = "As modern embedded systems like cars need high-power integrated CPUs--GPU SoCs for various real-time applications such as lane or pedestrian detection, they face greater thermal problems than before, which may, in turn, incur higher failure rate and cooling cost. We demonstrate, via experimentation on a representative CPUs--GPU platform, the importance of accounting for two distinct thermal characteristics-the platform's temperature imbalance and different power dissipations of different tasks -in real-time scheduling to avoid any burst of power dissipations while guaranteeing all timing constraints. To achieve this goal, we propose a new Real-Time Thermal-Aware Scheduling (RT-TAS) framework. We first capture different CPU cores' temperatures caused by different GPU power dissipations (i.e., CPUs--GPU thermal coupling) with core-specific thermal coupling coefficients. We then develop thermally-balanced task-to-core assignment and CPUs--GPU co-scheduling. The former addresses the platform's temperature imbalance by efficiently distributing the thermal load across cores while preserving scheduling feasibility. Building on the thermally-balanced task assignment, the latter cooperatively schedules CPU and GPU computations to avoid simultaneous peak power dissipations on both CPUs and GPU, thus mitigating excessive temperature rises while meeting task deadlines. We have implemented and evaluated RT-TAS on an automotive embedded platform to demonstrate its effectiveness in reducing the maximum temperature by 6-12.2${}^\circ $ C over existing approaches without violating any task deadline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2019:TAF, author = "Peng Chen and Weichen Liu and Xu Jiang and Qingqiang He and Nan Guan", title = "Timing-Anomaly Free Dynamic Scheduling of Conditional {DAG} Tasks on Multi-Core Systems", journal = j-TECS, volume = "18", number = "5s", pages = "91:1--91:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358236", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358236", abstract = "In this paper, we propose a novel approach to schedule conditional DAG parallel tasks, with which we can derive safe response time upper bounds significantly better than the state-of-the-art counterparts. The main idea is to eliminate the notorious timing anomaly in scheduling parallel tasks by enforcing certain order constraints among the vertices, and thus the response time bound can be accurately predicted off-line by somehow ``simulating'' the runtime scheduling. A key challenge to apply the timing-anomaly free scheduling approach to conditional DAG parallel tasks is that at runtime it may generate exponentially many instances from a conditional DAG structure. To deal with this problem, we develop effective abstractions, based on which a safe response time upper bound is computed in polynomial time. We also develop algorithms to explore the vertex orders to shorten the response time bound. The effectiveness of the proposed approach is evaluated by experiments with randomly generated DAG tasks with different parameter configurations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2019:SVH, author = "Yu Wang and Mojtaba Zarei and Borzoo Bonakdarpour and Miroslav Pajic", title = "Statistical Verification of Hyperproperties for Cyber-Physical Systems", journal = j-TECS, volume = "18", number = "5s", pages = "92:1--92:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358232", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358232", abstract = "Many important properties of cyber-physical systems (CPS) are defined upon the relationship between multiple executions simultaneously in continuous time. Examples include probabilistic fairness and sensitivity to modeling errors (i.e., parameters changes) for real-valued signals. These requirements can only be specified by hyperproperties. In this article, we focus on verifying probabilistic hyperproperties for CPS. To cover a wide range of modeling formalisms, we first propose a general model of probabilistic uncertain systems (PUSs) that unify commonly studied CPS models such as continuous-time Markov chains (CTMCs) and probabilistically parametrized Hybrid I/O Automata (P$^2$ HIOA). To formally specify hyperproperties, we propose a new temporal logic, hyper probabilistic signal temporal logic (HyperPSTL) that serves as a hyper and probabilistic version of the conventional signal temporal logic (STL). Considering the complexity of real-world systems that can be captured as PUSs, we adopt a statistical model checking (SMC) approach for their verification. We develop a new SMC technique based on the direct computation of significance levels of statistical assertions for HyperPSTL specifications, which requires no a priori knowledge on the indifference margin. Then, we introduce SMC algorithms for HyperPSTL specifications on the joint probabilistic distribution of multiple paths, as well as specifications with nested probabilistic operators quantifying different paths, which cannot be handled by existing SMC algorithms. Finally, we show the effectiveness of our SMC algorithms on CPS benchmarks with varying levels of complexity, including the Toyota Powertrain Control System.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Luo:2019:PFC, author = "Zhengxiong Luo and Feilong Zuo and Yu Jiang and Jian Gao and Xun Jiao and Jiaguang Sun", title = "{Polar}: Function Code Aware Fuzz Testing of {ICS} Protocol", journal = j-TECS, volume = "18", number = "5s", pages = "93:1--93:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358227", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358227", abstract = "Industrial Control System (ICS) protocols are widely used to build communications among system components. Compared with common internet protocols, ICS protocols have more control over remote devices by carrying a specific field called ``function code'', which assigns what the receive end should do. Therefore, it is of vital importance to ensure their correctness. However, traditional vulnerability detection techniques such as fuzz testing are challenged by the increasing complexity of these diverse ICS protocols. In this paper, we present a function code aware fuzzing framework --- Polar, which automatically extracts semantic information from the ICS protocol and utilizes this information to accelerate security vulnerability detection. Based on static analysis and dynamic taint analysis, Polar initiates the values of the function code field and identifies some vulnerable operations. Then, novel semantic aware mutation and selection strategies are designed to optimize the fuzzing procedure. For evaluation, we implement Polar on top of two popular fuzzers --- AFL and AFLFast, and conduct experiments on several widely used ICS protocols such as Modbus, IEC104, and IEC 61850. Results show that, compared with AFL and AFLFast, Polar achieves the same code coverage and bug detection numbers at the speed of 1.5X-12X. It also gains increase with 0\%--91\% more paths within 24 hours. Furthermore, Polar has exposed 10 previously unknown vulnerabilities in those protocols, 6 of which have been assigned unique CVE identifiers in the US National Vulnerability Database.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2019:STC, author = "Youcheng Sun and Xiaowei Huang and Daniel Kroening and James Sharp and Matthew Hill and Rob Ashmore", title = "Structural Test Coverage Criteria for Deep Neural Networks", journal = j-TECS, volume = "18", number = "5s", pages = "94:1--94:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358233", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358233", abstract = "Deep neural networks (DNNs) have a wide range of applications, and software employing them must be thoroughly tested, especially in safety-critical domains. However, traditional software test coverage metrics cannot be applied directly to DNNs. In this paper, inspired by the MC/DC coverage criterion, we propose a family of four novel test coverage criteria that are tailored to structural features of DNNs and their semantics. We validate the criteria by demonstrating that test inputs that are generated with guidance by our proposed coverage criteria are able to capture undesired behaviours in a DNN. Test cases are generated using a symbolic approach and a gradient-based heuristic search. By comparing them with existing methods, we show that our criteria achieve a balance between their ability to find bugs (proxied using adversarial examples and correlation with functional coverage) and the computational cost of test input generation. Our experiments are conducted on state-of-the-art DNNs obtained using popular open source datasets, including MNIST, CIFAR-10 and ImageNet.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2019:GBM, author = "Yi-Ting Lin and Hsiang Hsu and Shang-Chien Lin and Chung-Wei Lin and Iris Hui-Ru Jiang and Changliu Liu", title = "Graph-Based Modeling, Scheduling, and Verification for Intersection Management of Intelligent Vehicles", journal = j-TECS, volume = "18", number = "5s", pages = "95:1--95:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358221", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358221", abstract = "Intersection management is one of the most representative applications of intelligent vehicles with connected and autonomous functions. The connectivity provides environmental information that a single vehicle cannot sense, and the autonomy supports precise vehicular control that a human driver cannot achieve. Intersection management solves the fundamental conflict resolution problem for vehicles-two vehicles should not appear at the same location at the same time, and, if they intend to do that, an order should be decided to optimize certain objectives such as the traffic throughput or smoothness. In this paper, we first propose a graph-based model for intersection management. The model is general and applicable to different granularities of intersections and other conflicting scenarios. We then derive formal verification approaches which can guarantee deadlock-freeness. Based on the graph-based model and the verification approaches, we develop a centralized cycle removal algorithm for the graph-based model to schedule vehicles to go through the intersection safely (without collisions) and efficiently without deadlocks. Experimental results demonstrate the expressiveness of the proposed model and the effectiveness and efficiency of the proposed algorithm.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kyriakis:2019:SMR, author = "Panagiotis Kyriakis and Jyotirmoy V. Deshmukh and Paul Bogdan", title = "Specification Mining and Robust Design under Uncertainty: a Stochastic Temporal Logic Approach", journal = j-TECS, volume = "18", number = "5s", pages = "96:1--96:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358231", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358231", abstract = "In this paper, we propose Stochastic Temporal Logic (StTL) as a formalism for expressing probabilistic specifications on time-varying behaviors of controlled stochastic dynamical systems. To make StTL a more effective specification formalism, we introduce the quantitative semantics for StTL to reason about the robust satisfaction of an StTL specification by a given system. Additionally, we propose using the robustness value as the objective function to be maximized by a stochastic optimization algorithm for the purpose of controller design. Finally, we formulate an algorithm for parameter inference for Parameteric-StTL specifications, which allows specifications to be mined from output traces of the underlying system. We demonstrate and validate our framework on two case studies inspired by the automotive domain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2019:RRS, author = "Bineet Ghosh and Parasara Sridhar Duggirala", title = "Robust Reachable Set: Accounting for Uncertainties in Linear Dynamical Systems", journal = j-TECS, volume = "18", number = "5s", pages = "97:1--97:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358229", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358229", abstract = "Reachable set computation is one of the primary techniques for safety verification of linear dynamical systems. In reality the underlying dynamics have uncertainties like parameter variations or modeling uncertainties. Therefore, the reachable set computation must consider the uncertainties in the dynamics to be useful i.e. the computed reachable set should be over or under approximation if not exact. This paper presents a technique to compute reachable set of linear dynamical systems with uncertainties. First, we introduce a construct called support of a matrix. Using this construct, we present a set of sufficient conditions for which reachable set for uncertain linear system can be computed efficiently; and safety verification can be performed using bi-linear programming. Finally, given a linear dynamical system, we compute robust reachable set, which accounts for all possible uncertainties that can be handled by the sufficient conditions presented. Experimental evaluation on benchmarks reveal that our algorithm is computationally very efficient.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lal:2019:CGA, author = "Ratan Lal and Pavithra Prabhakar", title = "Counterexample Guided Abstraction Refinement for Polyhedral Probabilistic Hybrid Systems", journal = j-TECS, volume = "18", number = "5s", pages = "98:1--98:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358217", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358217", abstract = "We consider the problem of safety analysis of probabilistic hybrid systems, which capture discrete, continuous and probabilistic behaviors. We present a novel counterexample guided abstraction refinement (CEGAR) algorithm for a subclass of probabilistic hybrid systems, called polyhedral probabilistic hybrid systems (PHS), where the continuous dynamics is specified using a polyhedral set within which the derivatives of the continuous executions lie. Developing a CEGAR algorithm for PHS is complex owing to the branching behavior due to the probabilistic transitions, and the infinite state space due to the real-valued variables. We present a practical algorithm by choosing a succinct representation for counterexamples, an efficient validation algorithm and a constructive method for refinement that ensures progress towards the elimination of a spurious abstract counterexample. The technical details for refinement are non-trivial since there are no clear disjoint sets for separation. We have implemented our algorithm in a Python toolbox called Procegar; our experimental analysis demonstrates the benefits of our method in terms of successful verification results, as well as bug finding.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Duggirala:2019:ASR, author = "Parasara Sridhar Duggirala and Stanley Bak", title = "Aggregation Strategies in Reachable Set Computation of Hybrid Systems", journal = j-TECS, volume = "18", number = "5s", pages = "99:1--99:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358214", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358214", abstract = "Computing the set of reachable states is a widely used technique for proving that a hybrid system satisfies its safety specification. Flow-pipe construction methods interleave phases of computing continuous successors and phases of computing discrete successors. Directly doing this leads to a combinatorial explosion problem, though, as with each discrete successor there may be an interval of time where the transition can occur, so that the number of paths becomes exponential in the number of discrete transitions. For this reason, most reachable set computation tools implement some form of set aggregation for discrete transitions, such as, performing a template-based overapproximation or convex hull aggregation. These aggregation methods, however, in theory can lead to unbounded error, and in practice are often the root cause of why a safety specification cannot be proven. This paper proposes techniques for improving the accuracy of the aggregation operations performed for reachable set computation. First, we present two aggregation strategies over generalized stars, namely convex hull aggregation and template based aggregation. Second, we perform adaptive deaggregation using a data structure called Aggregated Directed Acyclic Graph (AGGDAG). Our deaggregation strategy is driven by counterexamples and hence has soundness and relative completeness guarantees. We demonstrate the computational benefits of our approach through two case studies involving satellite rendezvous and gearbox meshing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Salamati:2019:MEM, author = "Mahmoud Salamati and Rocco Salvia and Eva Darulova and Sadegh Soudjani and Rupak Majumdar", title = "Memory-Efficient Mixed-Precision Implementations for Robust Explicit Model Predictive Control", journal = j-TECS, volume = "18", number = "5s", pages = "100:1--100:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358223", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358223", abstract = "We propose an optimization for space-efficient implementations of explicit model-predictive controllers (MPC) for robust control of linear time-invariant (LTI) systems on embedded platforms. We obtain an explicit-form robust model-predictive controller as a solution to a multi-parametric linear programming problem. The structure of the controller is a polyhedral decomposition of the control domain, with an affine map for each domain. While explicit MPC is suited for embedded devices with low computational power, the memory requirements for such controllers can be high. We provide an optimization algorithm for a mixed-precision implementation of the controller, where the deviation of the implemented controller from the original one is within the robustness margin of the robust control problem. The core of the mixed-precision optimization is an iterative static analysis that co-designs a robust controller and a low-bitwidth approximation that is statically guaranteed to always be within the robustness margin of the original controller. We have implemented our algorithm and show on a set of benchmarks that our optimization can reduce space requirements by up to 20.9\% and on average by 12.6\% compared to a minimal uniform precision implementation of the original controller.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Arrestier:2019:NRD, author = "Florian Arrestier and Karol Desnos and Eduardo Juarez and Daniel Menard", title = "Numerical Representation of Directed Acyclic Graphs for Efficient Dataflow Embedded Resource Allocation", journal = j-TECS, volume = "18", number = "5s", pages = "101:1--101:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358225", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358225", abstract = "Stream processing applications running on Heterogeneous Multi-Processor Systems on Chips (HMPSoCs) require efficient resource allocation and management, both at compile-time and at runtime. To cope with modern adaptive applications whose behavior can not be exhaustively predicted at compile-time, runtime managers must be able to take resource allocation decisions on-the-fly, with a minimum overhead on application performance. Resource allocation algorithms often rely on an internal modeling of an application. Directed Acyclic Graph (DAGs) are the most commonly used models for capturing control and data dependencies between tasks. DAGs are notably often used as an intermediate representation for deploying applications modeled with a dataflow Model of Computation (MoC) on HMPSoCs. Building such intermediate representation at runtime for massively parallel applications is costly both in terms of computation and memory overhead. In this paper, an intermediate representation of DAGs for resource allocation is presented. This new representation shows improved performance for run-time analysis of dataflow graphs with less overhead in both computation time and memory footprint. The performances of the proposed representation are evaluated on a set of computer vision and machine learning applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ziegler:2019:HSE, author = "Andreas Ziegler and Julian Geus and Bernhard Heinloth and Timo H{\"o}nig and Daniel Lohmann", title = "{Honey}, {I} Shrunk the {ELFs}: Lightweight Binary Tailoring of Shared Libraries", journal = j-TECS, volume = "18", number = "5s", pages = "102:1--102:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358222", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358222", abstract = "In the embedded domain, industrial sectors (i.e., automotive industry, avionics) are undergoing radical changes. They broadly adopt commodity hardware and move away from special-purpose control units. During this transition, heterogeneous software components are consolidated to run on commodity operating systems. To efficiently consolidate such components, a modular encapsulation of common functionality into reusable binary files (i.e., shared libraries) is essential. However, shared libraries are often unnecessarily large as they entail a lot of generic functionality that is not required in a narrowly defined scenario. As the source code of proprietary components is often unavailable and the industry is heading towards binary-only distribution, we propose an approach towards lightweight binary tailoring. As demonstrated in the evaluation, lightweight binary tailoring effectively reduces the amount of code in all shared libraries on a Linux-based system by 63 percent and shrinks their files by 17 percent. The reduction in size is beneficial to cut down costs (e.g., lower storage and memory footprint) and eases code analyses that are necessary for code audits.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2019:MTP, author = "Runyu Pan and Gabriel Parmer", title = "{MxU}: Towards Predictable, Flexible, and Efficient Memory Access Control for the Secure {IoT}", journal = j-TECS, volume = "18", number = "5s", pages = "103:1--103:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358224", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358224", abstract = "The advanced functionality requirements of modern embedded and Internet of Things (IoT) devices --- from autonomous vehicles, to city and power-grid management --- are driving an ever-increasing software complexity. At the same time, the pervasive internet connections of these systems necessitate the fundamental design of security into these devices. The isolation of complex features from those that are critical through protection domains is an effective means to constrain the scope of faults and security breaches. Common hardware-provided memory facilities to enforce protection domains through memory access control --- including Memory Management Units (MMUs) usually found in microprocessors, and Memory Protection Units (MPUs) usually found in microcontrollers --- must meet the goals of enabling flexible, efficient and dynamic management of memory, and must enable tight bounds on the worst-case execution of critical code. Unfortunately, current system memory management facilities are ill-prepared to handle this challenge: MMUs that use extensive caches to achieve strong average-case performance suffer from debilitating worst-case and even average-case behavior under hefty interference, while MPUs struggle to provide flexible memory management. This paper details MxU, a memory protection and allocation abstraction that integrates temporal specifications into the memory management subsystem, to enable portable code to achieve both predictable, tightly-bounded execution and dynamic management across both MMU- and MPU-based systems. We implement MxU in the Composite microkernel, and evaluate its flexibility and predictability over two different architectures: a MPU-based Cortex-M7 microcontroller and a MMU-based Cortex-A9 microprocessor using a suite of modern applications including neural network-based inference, SQLite, and a javascript runtime. For MMU-based systems, MxU reduces application TLB stall by up to 68.0\%. For MPU-based systems, MxU enables flexible dynamic memory management often with application overheads of 1\%, increasing to 6.1\% under significant interference.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yim:2019:TFS, author = "Keun Soo Yim and Iliyan Malchev and Andrew Hsieh and Dave Burke", title = "{Treble}: Fast Software Updates by Creating an Equilibrium in an Active Software Ecosystem of Globally Distributed Stakeholders", journal = j-TECS, volume = "18", number = "5s", pages = "104:1--104:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358237", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358237", abstract = "This paper presents our experience with Treble, a two-year initiative to build the modular base in Android, a Java-based mobile platform running on the Linux kernel. Our Treble architecture splits the hardware independent core framework written in Java from the hardware dependent vendor implementations (e.g., user space device drivers, vendor native libraries, and kernel written in C/C++). Cross-layer communications between them are done via versioned, stable inter-process communication interfaces whose backward compatibility is tested by using two API compliance suites. Based on this architecture, we repackage the key Android software components that suffered from crucial post-launch security bugs as separate images. That not only enables separate ownerships but also independent updates of each image by interested ecosystem entities. We discuss our experience of delivering Treble architectural changes to silicon vendors and device makers using a yearly release model. Our experiments and industry rollouts support our hypothesis that giving more freedom to all ecosystem entities and creating an equilibrium are a transformation necessary to further scale the world largest open source ecosystem with over two billion active devices.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tran:2019:SVC, author = "Hoang-Dung Tran and Feiyang Cai and Manzanas Lopez Diego and Patrick Musau and Taylor T. Johnson and Xenofon Koutsoukos", title = "Safety Verification of Cyber-Physical Systems with Reinforcement Learning Control", journal = j-TECS, volume = "18", number = "5s", pages = "105:1--105:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358230", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358230", abstract = "This paper proposes a new forward reachability analysis approach to verify safety of cyber-physical systems (CPS) with reinforcement learning controllers. The foundation of our approach lies on two efficient, exact and over-approximate reachability algorithms for neural network control systems using star sets, which is an efficient representation of polyhedra. Using these algorithms, we determine the initial conditions for which a safety-critical system with a neural network controller is safe by incrementally searching a critical initial condition where the safety of the system cannot be established. Our approach produces tight over-approximation error and it is computationally efficient, which allows the application to practical CPS with learning enable components (LECs). We implement our approach in NNV, a recent verification tool for neural networks and neural network control systems, and evaluate its advantages and applicability by verifying safety of a practical Advanced Emergency Braking System (AEBS) with a reinforcement learning (RL) controller trained using the deep deterministic policy gradient (DDPG) method. The experimental results show that our new reachability algorithms are much less conservative than existing polyhedra-based approaches. We successfully determine the entire region of the initial conditions of the AEBS with the RL controller such that the safety of the system is guaranteed, while a polyhedra-based approach cannot prove the safety properties of the system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2019:RRA, author = "Chao Huang and Jiameng Fan and Wenchao Li and Xin Chen and Qi Zhu", title = "{ReachNN}: Reachability Analysis of Neural-Network Controlled Systems", journal = j-TECS, volume = "18", number = "5s", pages = "106:1--106:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358228", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358228", abstract = "Applying neural networks as controllers in dynamical systems has shown great promises. However, it is critical yet challenging to verify the safety of such control systems with neural-network controllers in the loop. Previous methods for verifying neural network controlled systems are limited to a few specific activation functions. In this work, we propose a new reachability analysis approach based on Bernstein polynomials that can verify neural-network controlled systems with a more general form of activation functions, i.e., as long as they ensure that the neural networks are Lipschitz continuous. Specifically, we consider abstracting feedforward neural networks with Bernstein polynomials for a small subset of inputs. To quantify the error introduced by abstraction, we provide both theoretical error bound estimation based on the theory of Bernstein polynomials and more practical sampling based error bound estimation, following a tight Lipschitz constant estimation approach based on forward reachability analysis. Compared with previous methods, our approach addresses a much broader set of neural networks, including heterogeneous neural networks that contain multiple types of activation functions. Experiment results on a variety of benchmarks show the effectiveness of our approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yaghoubi:2019:WCS, author = "Shakiba Yaghoubi and Georgios Fainekos", title = "Worst-case Satisfaction of {STL} Specifications Using Feedforward Neural Network Controllers: a {Lagrange} Multipliers Approach", journal = j-TECS, volume = "18", number = "5s", pages = "107:1--107:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358239", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358239", abstract = "In this paper, a reinforcement learning approach for designing feedback neural network controllers for nonlinear systems is proposed. Given a Signal Temporal Logic (STL) specification which needs to be satisfied by the system over a set of initial conditions, the neural network parameters are tuned in order to maximize the satisfaction of the STL formula. The framework is based on a max-min formulation of the robustness of the STL formula. The maximization is solved through a Lagrange multipliers method, while the minimization corresponds to a falsification problem. We present our results on a vehicle and a quadrotor model and demonstrate that our approach reduces the training time more than 50 percent compared to the baseline approach.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Oehlert:2019:CIT, author = "Dominic Oehlert and Selma Saidi and Heiko Falk", title = "Code-Inherent Traffic Shaping for Hard Real-Time Systems", journal = j-TECS, volume = "18", number = "5s", pages = "108:1--108:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358215", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358215", abstract = "Modern hard real-time systems evolved from isolated single-core architectures to complex multi-core architectures which are often connected in a distributed manner. With the increasing influence of interconnections in hard real-time systems, the access behavior to shared resources of single tasks or cores becomes a crucial factor for the system's overall worst-case timing properties. Traffic shaping is a powerful technique to decrease contention in a network and deliver guarantees on network streams. In this paper we present a novel approach to automatically integrate a traffic shaping behavior into the code of a program for different traffic shaping profiles while being as least invasive as possible. As this approach is solely depending on modifying programs on a code-level, it does not rely on any additional hardware or operating system-based functions. We show how different traffic shaping profiles can be implemented into programs using a greedy heuristic and an evolutionary algorithm, as well as their influences on the modified programs. It is demonstrated that the presented approaches can be used to decrease worst-case execution times in multi-core systems and lower buffer requirements in distributed systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Awan:2019:TAM, author = "Muhammad Ali Awan and Konstantinos Bletsas and Pedro F. Souto and Benny Akesson and Eduardo Tovar", title = "Techniques and Analysis for Mixed-criticality Scheduling with Mode-dependent Server Execution Budgets", journal = j-TECS, volume = "18", number = "5s", pages = "109:1--109:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358234", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358234", abstract = "In mixed-criticality systems, tasks of different criticality share system resources, mainly to reduce cost. Cost is further reduced by using adaptive mode-based scheduling arrangements, such as Vestal's model, to improve resource efficiency, while guaranteeing schedulability of critical functionality. To simplify safety certification, servers are often used to provide temporal isolation between tasks. In its simplest form, a server is a periodically recurring time window, in which some tasks are scheduled. A server's computational requirements may greatly vary in different modes, although state-of-the-art techniques and schedulability tests do not allow different budgets to be used by a server in different modes. This results in a single conservative execution budget for all modes, increasing system cost. The goal of this paper is to reduce the cost of mixed-criticality systems through three main contributions: (i) a scheduling arrangement for uniprocessor systems employing fixed-priority scheduling within periodic servers, whose budgets are dynamically adjusted at run-time in the event of a mode change, (ii) a new schedulability analysis for such systems, and (iii) heuristic algorithms for assigning budgets to servers in different modes and ordering the execution of the servers. Experiments with synthetic task sets demonstrate considerable improvements (up to 52.8\%) in scheduling success ratio when using dynamic server budgets vs. static ``one-size-fits-all-modes'' budgets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{VanPinxten:2019:PSC, author = "Joost {Van Pinxten} and Marc Geilen and Twan Basten", title = "Parametric Scheduler Characterization", journal = j-TECS, volume = "18", number = "5s", pages = "110:1--110:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358226", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:44 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358226", abstract = "Schedulers assign starting times to events in a system such that a set of constraints is met and system productivity is maximized. We characterize the scheduler behaviour for the case where decisions are made by comparing affine expressions of design parameters such as task workload, processing speed, robot travelling speed, or a controller's rise and settling time. Deterministic schedulers can be extended with symbolic execution, to keep track of the affine conditions on the parameters for which the scheduling decisions are made. We introduce a divide-and-conquer algorithm that uses this information to determine parameter regions for which the same sequence of decisions is taken given a particular scenario. The results provide designers insight in the impact of parameter changes on the performance of their system. The exploration can also be executed with the KLEE symbolic execution engine of the LLVM tool chain to extract the same results. We show that the divide-and-conquer approach provides the results much faster than the generic symbolic execution engine of KLEE. The results allow visualization of the sensitivity to all parameter combinations. The results of our approach therefore provide more insight in the sensitivity to parameters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2020:EEC, author = "Sandeep K. Shukla", title = "Editorial: Embedded Computing and Society", journal = j-TECS, volume = "18", number = "6", pages = "1--3", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3368250", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3368250", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jain:2020:CHS, author = "Shubham Jain and Anand Raghunathan", title = "{CxDNN}: Hardware-software Compensation Methods for Deep Neural Networks on Resistive Crossbar Systems", journal = j-TECS, volume = "18", number = "6", pages = "1--23", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362035", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362035", abstract = "Resistive crossbars have shown strong potential as the building blocks of future neural fabrics, due to their ability to natively execute vector-matrix multiplication (the dominant computational kernel in DNNs). However, a key challenge that arises in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tiku:2020:OSV, author = "Saideep Tiku and Sudeep Pasricha", title = "Overcoming Security Vulnerabilities in Deep Learning-based Indoor Localization Frameworks on Mobile Devices", journal = j-TECS, volume = "18", number = "6", pages = "1--24", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362036", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362036", abstract = "Indoor localization is an emerging application domain for the navigation and tracking of people and assets. Ubiquitously available Wi-Fi signals have enabled low-cost fingerprinting-based localization solutions. Further, the rapid growth in mobile \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tiwari:2020:RRA, author = "Sakshi Tiwari and Shreshth Tuli and Isaar Ahmad and Ayushi Agarwal and Preeti Ranjan Panda and Sreenivas Subramoney", title = "{REAL}: {REquest} Arbitration in Last Level Caches", journal = j-TECS, volume = "18", number = "6", pages = "1--24", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362100", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362100", abstract = "Shared last level caches (LLC) of multicore systems-on-chip are subject to a significant amount of contention over a limited bandwidth, resulting in major performance bottlenecks that make the issue a first-order concern in modern multiprocessor systems-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sood:2020:RDV, author = "Surinder Sood and Avinash Malik and Partha Roop", title = "Robust Design and Validation of Cyber-physical Systems", journal = j-TECS, volume = "18", number = "6", pages = "1--21", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362098", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362098", abstract = "Co-simulation--based validation of hardware controllers adjoined with plant models, with continuous dynamics, is an important step in model-based design of controllers for Cyber-physical Systems (CPS). Co-simulation suffers from many problems, such as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "116", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2020:BBT, author = "Jia Zhou and Prachi Joshi and Haibo Zeng and Renfa Li", title = "{BTMonitor}: Bit-time-based Intrusion Detection and Attacker Identification in Controller Area Network", journal = j-TECS, volume = "18", number = "6", pages = "1--23", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362034", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362034", abstract = "With the rapid growth of connectivity and autonomy for today's automobiles, their security vulnerabilities are becoming one of the most urgent concerns in the automotive industry. The lack of message authentication in Controller Area Network (CAN), \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "117", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2020:HSC, author = "Mengquan Li and Weichen Liu and Nan Guan and Yiyuan Xie and Yaoyao Ye", title = "Hardware-Software Collaborative Thermal Sensing in Optical Network-on-Chip--based Manycore Systems", journal = j-TECS, volume = "18", number = "6", pages = "1--24", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362099", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362099", abstract = "Continuous technology scaling in manycore systems leads to severe overheating issues. To guarantee system reliability, it is critical to accurately yet efficiently monitor runtime temperature distribution for effective chip thermal management. As an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "118", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Neshatpour:2020:IIC, author = "Katayoun Neshatpour and Houman Homayoun and Avesta Sasan", title = "{ICNN}: The Iterative Convolutional Neural Network", journal = j-TECS, volume = "18", number = "6", pages = "1--27", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3355553", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3355553", abstract = "Modern and recent architectures of vision-based Convolutional Neural Networks (CNN) have improved detection and prediction accuracy significantly. However, these algorithms are extremely computationally intensive. To break the power and performance wall \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "119", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cedersjo:2020:TFC, author = "Gustav Cedersj{\"o} and J{\"o}rn W. Janneck", title = "{T{\"y}cho}: a Framework for Compiling Stream Programs", journal = j-TECS, volume = "18", number = "6", pages = "1--25", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362692", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362692", abstract = "Many application areas for embedded systems, such as DSP, media coding, and image processing, are based on stream processing. Stream programs in these areas are often naturally described as graphs, where nodes are computational kernels that send data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "120", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hammadeh:2020:WHR, author = "Zain A. H. Hammadeh and Sophie Quinton and Rolf Ernst", title = "Weakly-hard Real-time Guarantees for Earliest Deadline First Scheduling of Independent Tasks", journal = j-TECS, volume = "18", number = "6", pages = "1--25", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3356865", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3356865", abstract = "The current trend in modeling and analyzing real-time systems is toward tighter yet safe timing constraints. Many practical real-time systems can de facto sustain a bounded number of deadline-misses, i.e., they have Weakly-Hard Real-Time (WHRT) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "121", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Krishnakumar:2020:APL, author = "Gnanambikai Krishnakumar and Kommuru Alekhya Reddy and Chester Rebeiro", title = "{ALEXIA}: a Processor with Lightweight Extensions for Memory Safety", journal = j-TECS, volume = "18", number = "6", pages = "1--27", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362064", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362064", abstract = "Illegal use of memory pointers is a serious security vulnerability. A large number of malwares exploit the spatial and temporal nature of these vulnerabilities to subvert execution or glean sensitive data from an application. Recent countermeasures \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "122", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yan:2020:TCH, author = "Kaige Yan and Jingweijia Tan and Longjun Liu and Xingyao Zhang and Stanko R. Brankovic and Jinghong Chen and Xin Fu", title = "Toward Customized Hybrid Fuel-Cell and Battery-powered Mobile Device for Individual Users", journal = j-TECS, volume = "18", number = "6", pages = "1--20", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3362033", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 23 06:51:29 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3362033", abstract = "Rapidly evolving technologies and applications of mobile devices inevitably increase the power demands on the battery. However, the development of batteries can hardly keep pace with the fast-growing demands, leading to short battery life, which becomes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "123", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Han:2020:BAP, author = "Jian-Jun Han and Sunlu Gong and Zhenjiang Wang and Wen Cai and Dakai Zhu and Laurence T. Yang", title = "Blocking-Aware Partitioned Real-Time Scheduling for Uniform Heterogeneous Multicore Platforms", journal = j-TECS, volume = "19", number = "1", pages = "1:1--1:25", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366683", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366683", abstract = "Heterogeneous multicore processors have recently become de facto computing engines for state-of-the-art embedded applications. Nonetheless, very little research focuses on the scheduling of periodic (implicit-deadline) real-time tasks upon heterogeneous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Marco:2020:ODL, author = "Vicent Sanz Marco and Ben Taylor and Zheng Wang and Yehia Elkhatib", title = "Optimizing Deep Learning Inference on Embedded Systems Through Adaptive Model Selection", journal = j-TECS, volume = "19", number = "1", pages = "2:1--2:28", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371154", abstract = "Deep neural networks (DNNs) are becoming a key enabling technique for many application domains. However, on-device inference on battery-powered, resource-constrained embedding systems is often infeasible due to prohibitively long inferencing time and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Azari:2020:ETO, author = "Elham Azari and Sarma Vrudhula", title = "{ELSA}: a Throughput-Optimized Design of an {LSTM} Accelerator for Energy-Constrained Devices", journal = j-TECS, volume = "19", number = "1", pages = "3:1--3:21", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366634", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366634", abstract = "The next significant step in the evolution and proliferation of artificial intelligence technology will be the integration of neural network (NN) models within embedded and mobile systems. This calls for the design of compact, energy efficient NN models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jayakodi:2020:DOE, author = "Nitthilan Kanappan Jayakodi and Syrine Belakaria and Aryan Deshwal and Janardhan Rao Doppa", title = "Design and Optimization of Energy-Accuracy Tradeoff Networks for Mobile Platforms via Pretrained Deep Models", journal = j-TECS, volume = "19", number = "1", pages = "4:1--4:24", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3366636", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3366636", abstract = "Many real-world edge applications including object detection, robotics, and smart health are enabled by deploying deep neural networks (DNNs) on energy-constrained mobile platforms. In this article, we propose a novel approach to trade off energy and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Romaszkan:2020:PPP, author = "Wojciech Romaszkan and Tianmu Li and Puneet Gupta", title = "{3PXNet}: Pruned-Permuted-Packed {XNOR} Networks for Edge Machine Learning", journal = j-TECS, volume = "19", number = "1", pages = "5:1--5:23", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371157", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371157", abstract = "As the adoption of Neural Networks continues to proliferate different classes of applications and systems, edge devices have been left behind. Their strict energy and storage limitations make them unable to cope with the sizes of common network models. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lang:2020:DIE, author = "Clemens Lang and Isabella Stilkerich", title = "Design and Implementation of an Escape Analysis in the Context of Safety-Critical Embedded Systems", journal = j-TECS, volume = "19", number = "1", pages = "6:1--6:20", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372133", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372133", abstract = "The use of a managed, type-safe language such as Standard ML, Ada Ravenscar, or Java in hard real-time and embedded systems offers productivity, safety, and dependability benefits at a reasonable cost. Static software systems, that is systems in which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2020:BCL, author = "Wenjian He and Sanjeev Das and Wei Zhang and Yang Liu", title = "{BBB-CFI}: Lightweight {CFI} Approach Against Code-Reuse Attacks Using Basic Block Information", journal = j-TECS, volume = "19", number = "1", pages = "7:1--7:22", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3371151", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3371151", abstract = "Code-reuse attack is a concrete threat to computing systems because it can evade conventional security defenses. Control flow integrity (CFI) is proposed to repel this threat. However, former implementations of CFI suffer from two major drawbacks: \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lizarraga:2020:AMB, author = "Adrian Lizarraga and Jonathan Sprinkle and Roman Lysecky", title = "Automated Model-Based Optimization of Data-Adaptable Embedded Systems", journal = j-TECS, volume = "19", number = "1", pages = "8:1--8:22", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372142", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 15 07:25:13 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372142", abstract = "Dynamic data-driven applications such as object tracking, surveillance, and other sensing and decision applications are largely dependent on the characteristics of the data streams on which they operate. The underlying models and algorithms of data-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2020:PGI, author = "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta", title = "Pattern Guided Integrated Scheduling and Routing in Multi-Hop Control Networks", journal = j-TECS, volume = "19", number = "2", pages = "9:1--9:28", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372134", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372134", abstract = "Executing a set of control loops over a shared multi-hop (wireless) control network (MCN) requires careful co-scheduling of the control tasks and the routing of sensory/actuation messages over the MCN. In this work, we establish pattern guided aperiodic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2020:QEO, author = "Fupeng Chen and Heng Yu and Yajun Ha", title = "Quality Estimation and Optimization of Adaptive Stereo Matching Algorithms for Smart Vehicles", journal = j-TECS, volume = "19", number = "2", pages = "10:1--10:24", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372784", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372784", abstract = "Stereo matching is a promising approach for smart vehicles to find the depth of nearby objects. Transforming a traditional stereo matching algorithm to its adaptive version has potential advantages to achieve the maximum quality (depth accuracy) in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nejatollahi:2020:SFA, author = "Hamid Nejatollahi and Felipe Valencia and Subhadeep Banik and Francesco Regazzoni and Rosario Cammarota and Nikil Dutt", title = "Synthesis of Flexible Accelerators for Early Adoption of Ring-{LWE} Post-quantum Cryptography", journal = j-TECS, volume = "19", number = "2", pages = "11:1--11:17", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378164", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378164", abstract = "The advent of the quantum computer makes current public-key infrastructure insecure. Cryptography community is addressing this problem by designing, efficiently implementing, and evaluating novel public-key algorithms capable of withstanding quantum \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Navarro:2020:MLM, author = "Osvaldo Navarro and Jones Yudi and Javier Hoffmann and Hector Gerardo Mu{\~n}oz Hernandez and Michael H{\"u}bner", title = "A Machine Learning Methodology for Cache Memory Design Based on Dynamic Instructions", journal = j-TECS, volume = "19", number = "2", pages = "12:1--12:20", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3376920", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3376920", abstract = "Cache memories are an essential component of modern processors and consume a large percentage of their power consumption. Its efficacy depends heavily on the memory demands of the software. Thus, finding the optimal cache for a particular program is not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kindt:2020:EMB, author = "Philipp H. Kindt and Daniel Yunge and Robert Diemer and Samarjit Chakraborty", title = "Energy Modeling for the {Bluetooth} Low Energy Protocol", journal = j-TECS, volume = "19", number = "2", pages = "13:1--13:32", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3379339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3379339", abstract = "Bluetooth Low Energy (BLE) is a wireless protocol optimized for low-power communication. To design energy-efficient devices, the protocol provides a number of parameters that need to be optimized within an energy, latency, and throughput design space. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Luppold:2020:CWC, author = "Arno Luppold and Dominic Oehlert and Heiko Falk", title = "Compiling for the Worst Case: Memory Allocation for Multi-task and Multi-core Hard Real-time Systems", journal = j-TECS, volume = "19", number = "2", pages = "14:1--14:26", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381752", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381752", abstract = "Modern embedded hard real-time systems feature multiple tasks running on multiple processing cores. Schedulability analysis of such systems is usually performed on an abstract system level with each task being represented as a black box with fixed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmad:2020:FFB, author = "Afzal Ahmad and Muhammad Adeel Pasha", title = "{FFConv}: an {FPGA}-based Accelerator for Fast Convolution Layers in Convolutional Neural Networks", journal = j-TECS, volume = "19", number = "2", pages = "15:1--15:24", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380548", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Mar 18 07:47:52 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380548", abstract = "Image classification is known to be one of the most challenging problems in the domain of computer vision. Significant research is being done on developing systems and algorithms improving accuracy, performance, area, and power consumption for related \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2020:TER, author = "Sandeep K. Shukla", title = "{TECS} Editorial: Rethinking and Re-evaluating in the Time of Crisis", journal = j-TECS, volume = "19", number = "3", pages = "16e:1--16e:3", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3395923", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3395923", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ganapathy:2020:DDV, author = "Sanjay Ganapathy and Swagath Venkataramani and Giridhur Sriraman and Balaraman Ravindran and Anand Raghunathan", title = "{DyVEDeep}: Dynamic Variable Effort Deep Neural Networks", journal = j-TECS, volume = "19", number = "3", pages = "16:1--16:24", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372882", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372882", abstract = "Deep Neural Networks (DNNs) have advanced the state-of-the-art in a variety of machine learning tasks and are deployed in increasing numbers of products and services. However, the computational requirements of training and evaluating large-scale DNNs \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Smeets:2020:ARS, author = "Hugues Smeets and Matteo Ceriotti and Pedro Jos{\'e} Marr{\'o}n", title = "Adapting Recursive Sinusoidal Software Oscillators for Low-power Fixed-point Processors", journal = j-TECS, volume = "19", number = "3", pages = "17:1--17:26", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378559", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378559", abstract = "The growing field of the Internet of Things relies at the bottom on components with very scarce computing resources that currently do not allow complex processing of sensed data. Any computation involving Fast Fourier Transforms (FFT), Wavelet \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cheng:2020:DDT, author = "Yuan Cheng and Guangya Li and Ngai Wong and Hai-Bao Chen and Hao Yu", title = "{DEEPEYE}: a Deeply Tensor-Compressed Neural Network for Video Comprehension on Terminal Devices", journal = j-TECS, volume = "19", number = "3", pages = "18:1--18:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3381805", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3381805", abstract = "Video object detection and action recognition typically require deep neural networks (DNNs) with huge number of parameters. It is thereby challenging to develop a DNN video comprehension unit in resource-constrained terminal devices. In this article, we \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aerabi:2020:DSE, author = "Ehsan Aerabi and Milad Bohlouli and Mohammad Hasan Ahmadi Livany and Mahdi Fazeli and Athanasios Papadimitriou and David Hely", title = "Design Space Exploration for Ultra-Low-Energy and Secure {IoT MCUs}", journal = j-TECS, volume = "19", number = "3", pages = "19:1--19:34", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3384446", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3384446", abstract = "This article explores the design space of secure communication in ultra-low-energy IoT devices based on Micro-Controller Units (MCUs). It tries to identify, benchmark, and compare security-related design choices in a Commercial-Off-The-Shelf (COTS) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2020:MMP, author = "Hwajeong Seo and Kyuhwang An and Hyeokdong Kwon and Zhi Hu", title = "{Montgomery} Multiplication for Public Key Cryptography on {MSP430X}", journal = j-TECS, volume = "19", number = "3", pages = "20:1--20:15", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387919", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387919", abstract = "For traditional public key cryptography and post-quantum cryptography, such as elliptic curve cryptography and supersingular isogeny key encapsulation, modular multiplication is the most performance-critical operation among basic arithmetic of these cryptographic schemes. For this reason, the execution timing of such cryptographic schemes, which may highly determine the service availability for low-end microprocessors (e.g., 8-bit AVR, 16-bit MSP430X, and 32-bit ARM Cortex-M), mainly relies on the efficiency of modular multiplication on target embedded processors. In this article, we present new optimal modular multiplication techniques based on the interleaved Montgomery multiplication on 16-bit MSP430X microprocessors, where the multiplication part is performed in a hardware multiplier and the reduction part is performed in a basic arithmetic logic unit (ALU) with the optimal modular multiplication routine, respectively. This two-step approach is effective for the special modulus of NIST curves, SM2 curves, and supersingular isogeny key encapsulation. We further optimized the Montgomery reduction by using techniques for Montgomery-friendly prime. This technique significantly reduces the number of partial products. To demonstrate the superiority of the proposed implementation of Montgomery multiplication, we applied the proposed method to the NIST P-256 curve, of which the implementation improves the previous modular multiplication operation by 23.6\% on 16-bit MSP430X microprocessors and to the SM2 curve as well (first implementation on 16-bit MSP430X microcontrollers). Moreover, secure countermeasures against timing attack and simple power analysis are also applied to the scalar multiplication of NIST P-256 and SM2 curves, which achieve the 8,582,338 clock cycles (0.53 seconds at 16 MHz) and 10,027,086 clock cycles (0.62 seconds at 16 MHz), respectively. The proposed Montgomery multiplication is a generic method that can be applied to other cryptographic schemes and microprocessors with minor modifications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2020:RSD, author = "Saurav Kumar Ghosh and Jaffer Sheriff R. C. and Vibhor Jain and Soumyajit Dey", title = "Reliable and Secure Design-Space-Exploration for Cyber-Physical Systems", journal = j-TECS, volume = "19", number = "3", pages = "21:1--21:29", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387927", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 8 17:07:32 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387927", abstract = "Given the widespread deployment of cyber-physical systems and their safety-critical nature, reliability and security guarantees offered by such systems are of paramount importance. While the security of such systems against sensor attacks have garnered \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2020:NLD, author = "Zhuoran Zhao and Kamyar Mirzazad Barijough and Andreas Gerstlauer", title = "Network-level Design Space Exploration of Resource-constrained Networks-of-Systems", journal = j-TECS, volume = "19", number = "4", pages = "22:1--22:26", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387918", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387918", abstract = "Driven by recent advances in networking and computing technologies, distributed application scenarios are increasingly deployed on resource-constrained processing platforms. This includes networked embedded and cyber-physical systems as well as edge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kadiyala:2020:LLA, author = "Sai Praveen Kadiyala and Manaar Alam and Yash Shrivastava and Sikhar Patranabis and Muhamed Fauzi Bin Abbas and Arnab Kumar Biswas and Debdeep Mukhopadhyay and Thambipillai Srikanthan", title = "{LAMBDA: Lightweight Assessment of Malware for emBeddeD} Architectures", journal = j-TECS, volume = "19", number = "4", pages = "23:1--23:31", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3390855", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3390855", abstract = "Security is a critical aspect in many of the latest embedded and IoT systems. Malware is one of the severe threats of security for such devices. There have been enormous efforts in malware detection and analysis; however, occurrences of newer varieties \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2020:ALS, author = "Tuhin Subhra Das and Prasun Ghosal and Navonil Chatterjee and Arnab Nath and Akash Banerjee and Subhojyoti Khastagir", title = "Application of Logical Sub-networking in Congestion-aware Deadlock-free {SDmesh} Routing", journal = j-TECS, volume = "19", number = "4", pages = "24:1--24:26", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387928", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387928", abstract = "An adaptive routing helps in evading early network saturation by steering data packets through the less congested area at the oppressive loaded situation. However, performances of adaptive routing are not always promising under all circumstances. Say \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chuang:2020:DDB, author = "Yi-Jing Chuang and Shuo-Han Chen and Yuan-Hao Chang and Yu-Pei Liang and Hsin-Wen Wei and Wei-Kuan Shih", title = "{DSTL}: a Demand-Based Shingled Translation Layer for Enabling Adaptive Address Mapping on {SMR} Drives", journal = j-TECS, volume = "19", number = "4", pages = "25:1--25:21", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391892", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3391892", abstract = "Shingled magnetic recording (SMR) is regarded as a promising technology for resolving the areal density limitation of conventional magnetic recording hard disk drives. Among different types of SMR drives, drive-managed SMR (DM-SMR) requires no changes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Behrouzian:2020:FAR, author = "Amir Behrouzian and Hadi Alizadeh Ara and Marc Geilen and Dip Goswami and Twan Basten", title = "Firmness Analysis of Real-time Tasks", journal = j-TECS, volume = "19", number = "4", pages = "26:1--26:24", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3398328", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3398328", abstract = "( m, k )-firm real-time tasks require meeting the deadline of at least m jobs out of any k consecutive jobs. When compared to hard real-time tasks, $ (m, k) $ firm tasks open up the possibility of tighter resource-dimensioning in implementations. Firmness \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2020:AML, author = "Ke Liu and Mengying Zhao and Lei Ju and Zhiping Jia and Jingtong Hu and Chun Jason Xue", title = "Applying Multiple Level Cell to Non-volatile {FPGAs}", journal = j-TECS, volume = "19", number = "4", pages = "27:1--27:22", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3400885", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3400885", abstract = "Static random access memory- (SRAM) based field programmable gate arrays (FPGAs) are currently facing challenges of limited capacity and high leakage power. To solve this problem, non-volatile memory (NVM) is proposed as the alternative to build non-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sheikh:2020:EER, author = "Saad Zia Sheikh and Muhammad Adeel Pasha", title = "Energy-efficient Real-time Scheduling on Multicores: a Novel Approach to Model Cache Contention", journal = j-TECS, volume = "19", number = "4", pages = "28:1--28:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399413", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3399413", abstract = "With the increasing demand for higher performance, the adoption of multicores has been a major stepping stone in the evolution of hard real-time systems. Though the computational bandwidth is increased due to parallel processing, the indispensable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2020:GBT, author = "Junyan Hu and Kenli Li and Chubo Liu and Keqin Li", title = "Game-Based Task Offloading of Multiple Mobile Devices with {QoS} in Mobile Edge Computing Systems of Limited Computation Capacity", journal = j-TECS, volume = "19", number = "4", pages = "29:1--29:21", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3398038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jul 19 08:50:15 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3398038", abstract = "Mobile edge computing (MEC) is becoming a promising paradigm of providing computing servers, like cloud computing, to Edge node. Compared to cloud servers, MECs are deployed closer to mobile devices (MDs) and can provide high quality-of-service (QoS \ldots{}).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saha:2021:MWR, author = "Debasri Saha and Susmita Sur-Kolay", title = "Minimization of {WCRT} with Recovery Assurance from Hardware {Trojans} for Tasks on {FPGA}-based Cloud", journal = j-TECS, volume = "20", number = "1", pages = "1:1--1:25", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409479", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3409479", abstract = "Dynamic partial reconfiguration (DPR) enabled FPGA-based Cloud architecture acts as a flexible and efficient shared environment to facilitates application support to users' request at low cost. While on one hand we need to handle a variety of tasks, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Srinivasavarma:2021:TBC, author = "Vegesna S. M. Srinivasavarma and Shiv Vidhyut and Noor Mahammad S.", title = "A {TCAM}-based Caching Architecture Framework for Packet Classification", journal = j-TECS, volume = "20", number = "1", pages = "2:1--2:19", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409109", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3409109", abstract = "Packet Classification is the enabling function for performing many networking applications like Integrated Services, Differentiated Services, Access Control/Firewalls, and Intrusion Detection. To cope with high-speed links and ever-increasing bandwidth \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pazzaglia:2021:GWH, author = "Paolo Pazzaglia and Youcheng Sun and Marco {Di Natale}", title = "Generalized Weakly Hard Schedulability Analysis for Real-Time Periodic Tasks", journal = j-TECS, volume = "20", number = "1", pages = "3:1--3:26", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3404888", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3404888", abstract = "The weakly hard real-time model is an abstraction for applications, including control systems, that can tolerate occasional deadline misses, but can also be compromised if a sufficiently high number of late terminations occur in a given time window. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paul:2021:ATA, author = "Suraj Paul and Navonil Chatterjee and Prasun Ghosal and Jean-Philippe Diguet", title = "Adaptive Task Allocation and Scheduling on {NoC}-based Multicore Platforms with Multitasking Processors", journal = j-TECS, volume = "20", number = "1", pages = "4:1--4:26", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3408324", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3408324", abstract = "The application workloads in modern multicore platforms are becoming increasingly dynamic. It becomes challenging when multiple applications need to be executed in parallel in such systems. Mapping and scheduling of these applications are critical for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Agarwal:2021:IPH, author = "Sukarn Agarwal and Hemangee K. Kapoor", title = "Improving the Performance of Hybrid Caches Using Partitioned Victim Caching", journal = j-TECS, volume = "20", number = "1", pages = "5:1--5:27", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3411368", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3411368", abstract = "Non-Volatile Memory technologies are coming as a viable option on account of the high density and low-leakage power over the conventional SRAM counterpart. However, the increased write latency reduces their chances as a substitute for SRAM. To attenuate \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2021:GCF, author = "Jiaji He and Haocheng Ma and Yanjiang Liu and Yiqiang Zhao", title = "Golden Chip-Free {Trojan} Detection Leveraging {Trojan Trigger}'s Side-Channel Fingerprinting", journal = j-TECS, volume = "20", number = "1", pages = "6:1--6:18", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3419105", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3419105", abstract = "Hardware Trojans (HTs) have become a major threat for the integrated circuit industry and supply chain and have motivated numerous developments of HT detection schemes. Although the side-channel HT detection approach is among the most promising \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ivanov:2021:VSA, author = "Radoslav Ivanov and Taylor J. Carpenter and James Weimer and Rajeev Alur and George J. Pappas and Insup Lee", title = "Verifying the Safety of Autonomous Systems with Neural Network Controllers", journal = j-TECS, volume = "20", number = "1", pages = "7:1--7:26", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3419742", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3419742", abstract = "This article addresses the problem of verifying the safety of autonomous systems with neural network (NN) controllers. We focus on NNs with sigmoid/tanh activations and use the fact that the sigmoid/tanh is the solution to a quadratic differential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ibrahim:2021:MFU, author = "Omar Adel Ibrahim and Savio Sciancalepore and Gabriele Oligeri and Roberto {Di Pietro}", title = "{MAGNETO}: Fingerprinting {USB} Flash Drives via Unintentional Magnetic Emissions", journal = j-TECS, volume = "20", number = "1", pages = "8:1--8:26", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3422308", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Jan 16 06:52:20 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3422308", abstract = "Universal Serial Bus (USB) Flash Drives are nowadays one of the most convenient and diffused means to transfer files, especially when no Internet connection is available. However, USB flash drives are also one of the most common attack vectors used to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fard:2021:APP, author = "Mahdi Mohammadpour Fard and Mahmood Hasanloo and Mehdi Kargahi", title = "Analytical Program Power Characterization for Battery Depletion-time Estimation", journal = j-TECS, volume = "20", number = "2", pages = "9:1--9:9", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3421511", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3421511", abstract = "Appropriate battery selection is a major design decision regarding the fast growth of battery-operated devices like space rovers, wireless sensor network nodes, rescue robots, and so on. Many such systems are mission critical, where estimation of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ungureanu:2021:FAT, author = "George Ungureanu and Jos{\'e} Edil {Guimar{\~a}es De Medeiros} and Timmy Sundstr{\"o}m and Ingemar S{\"o}derquist and Anders {\AA}hlander and Ingo Sander", title = "{ForSyDe-Atom}: Taming Complexity in Cyber Physical System Design with Layers", journal = j-TECS, volume = "20", number = "2", pages = "10:1--10:27", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424667", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3424667", abstract = "We present ForSyDe-Atom, a formal framework intended as an entry point for disciplined design of complex cyber-physical systems. This framework provides a set of rules for combining several domain-specific languages as structured, enclosing layers to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2021:HCO, author = "Keqin Li", title = "Heuristic Computation Offloading Algorithms for Mobile Users in Fog Computing", journal = j-TECS, volume = "20", number = "2", pages = "11:1--11:28", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3426852", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3426852", abstract = "The investigation in this article makes the following important contributions to combinatorial optimization of computation offloading in fog computing. First, we rigorously define the two problems of optimal computation offloading with energy constraint \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dharmaraj:2021:OSP, author = "Celia Dharmaraj and Vinita Vasudevan and Nitin Chandrachoodan", title = "Optimization of Signal Processing Applications Using Parameterized Error Models for Approximate Adders", journal = j-TECS, volume = "20", number = "2", pages = "12:1--12:25", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3430509", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3430509", abstract = "Approximate circuit design has gained significance in recent years targeting error-tolerant applications. In the literature, there have been several attempts at optimizing the number of approximate bits of each approximate adder in a system for a given \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Atoofian:2021:REG, author = "Ehsan Atoofian and Zayan Shaikh and Ali Jannesari", title = "Reducing Energy in {GPGPUs} through Approximate Trivial Bypassing", journal = j-TECS, volume = "20", number = "2", pages = "13:1--13:27", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429440", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3429440", abstract = "General-purpose computing using graphics processing units (GPGPUs) is an attractive option for acceleration of applications with massively data-parallel tasks. While performance of modern GPGPUs is increasing rapidly, the power consumption of these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Carreon:2021:PET, author = "Nadir A. Carreon and Sixing Lu and Roman Lysecky", title = "Probabilistic Estimation of Threat Intrusion in Embedded Systems for Runtime Detection", journal = j-TECS, volume = "20", number = "2", pages = "14:1--14:27", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3432590", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3432590", abstract = "With billions of networked connected embedded systems, the security historically provided by the isolation of embedded systems is no longer sufficient. Millions of new malware are created every month and zero-day attacks are becoming an increasing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Akbari:2021:FHA, author = "Ali Akbari and Jonathan Martinez and Roozbeh Jafari", title = "Facilitating Human Activity Data Annotation via Context-Aware Change Detection on Smartwatches", journal = j-TECS, volume = "20", number = "2", pages = "15:1--15:20", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431503", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3431503", abstract = "Annotating activities of daily living (ADL) is vital for developing machine learning models for activity recognition. In addition, it is critical for self-reporting purposes such as in assisted living where the users are asked to log their ADLs. However,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ko:2021:LCL, author = "Yousun Ko and Alex Chadwick and Daniel Bates and Robert Mullins", title = "Lane Compression: a Lightweight Lossless Compression Method for Machine Learning on Embedded Systems", journal = j-TECS, volume = "20", number = "2", pages = "16:1--16:26", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431815", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3431815", abstract = "This article presents Lane Compression, a lightweight lossless compression technique for machine learning that is based on a detailed study of the statistical properties of machine learning data. The proposed technique profiles machine learning data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sepulveda:2021:BCA, author = "Johanna Sep{\'u}lveda and Mathieu Gross and Andreas Zankl and Georg Sigl", title = "Beyond Cache Attacks: Exploiting the Bus-based Communication Structure for Powerful On-Chip Microarchitectural Attacks", journal = j-TECS, volume = "20", number = "2", pages = "17:1--17:23", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3433653", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 20 17:37:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3433653", abstract = "System-on-Chips (SoCs) are a key enabling technology for the Internet-of-Things (IoT), a hyper-connected world where on- and inter-chip communication is ubiquitous. SoCs usually integrate cryptographic hardware cores for confidentiality and authentication services. However, these components are prone to implementation attacks. During the operation of a cryptographic core, the secret key may passively be inferred through cache observations. Access-driven attacks exploiting these observations are therefore a vital threat to SoCs operating in IoT environments. Previous works have shown the feasibility of these attacks in the SoC context. Yet, the SoC communication structure can be used to further improve access-based cache attacks. The communication attacks are not as well-understood as other micro-architectural attacks. It is important to raise the awareness of SoC designers of such a threat. To this end, we present four contributions. First, we demonstrate an improved Prime+Probe attack on four different AES-128 implementations (original transformation tables, T0-Only, T2KB, and S-Box). As a novelty, this attack exploits the collisions of the bus-based SoC communication to further increase its efficiency. Second, we explore the impact of preloading on the efficiency of our communication-optimized attack. Third, we integrate three countermeasures (shuffling, mini-tables, and Time-Division Multiple Access (TDMA) bus arbitration) and evaluate their impact on the attack. Although shuffling and mini-tables countermeasures were proposed in previous work, their application as countermeasures against the bus-based attack was not studied before. In addition, TDMA as a countermeasure for bus-based attacks is an original contribution of this work. Fourth, we further discuss the implications of our work in the SoC design and its perspective with the new cryptographic primitives proposed in the ongoing National Institute of Standard and Technology Lightweight Cryptography competition. The results show that our improved communication-optimized attack is efficient, speeding up full key recovery by up to 400 times when compared to the traditional Prime+Probe technique. Moreover, the protection techniques are feasible and effectively mitigate the proposed improved attack.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mitra:2021:ERA, author = "Tulika Mitra", title = "Editorial: Reimagining {{\booktitle{ACM Transactions on Embedded Computing Systems (TECS)}}}", journal = j-TECS, volume = "20", number = "3", pages = "18e:1--18e:3", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450438", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3450438", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18e", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Langerman:2021:RTH, author = "David Langerman and Alan George", title = "Real-time, High-resolution Depth Upsampling on Embedded Accelerators", journal = j-TECS, volume = "20", number = "3", pages = "18:1--18:22", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436878", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3436878", abstract = "High-resolution, low-latency apps in computer vision are ubiquitous in today's world of mixed-reality devices. These innovations provide a platform that can leverage the improving technology of depth sensors and embedded accelerators to enable higher-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seo:2021:SBA, author = "Hwajeong Seo and Pakize Sanal and Reza Azarderakhsh", title = "{SIKE} in 32-bit {ARM} Processors Based on Redundant Number System for {NIST} Level-{II}", journal = j-TECS, volume = "20", number = "3", pages = "19:1--19:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439733", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3439733", abstract = "We present an optimized implementation of the post-quantum Supersingular Isogeny Key Encapsulation (SIKE) for 32-bit ARMv7-A processors supporting NEON engine (i.e., SIMD instruction). Unlike previous SIKE implementations, finite field arithmetic is efficiently implemented in a redundant representation, which avoids carry propagation and pipeline stall. Furthermore, we adopted several state-of-the-art engineering techniques as well as hand-crafted assembly implementation for high performance. Optimized implementations are ported to Microsoft SIKE library written in ``a non-redundant representation'' and evaluated in high-end 32-bit ARMv7-A processors, such as ARM Cortex-A5, A7, and A15. A full key-exchange execution of SIKEp503 is performed in about 109 million cycles on ARM Cortex-A15 processors (i.e., 54.5 ms @2.0 GHz), which is about $ 1.58 \times $ faster than previous state-of-the-art work presented in CHES 18.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2021:CSA, author = "Mingze Ma and Rizos Sakellariou", title = "Code-size-aware Scheduling of Synchronous Dataflow Graphs on Multicore Systems", journal = j-TECS, volume = "20", number = "3", pages = "20:1--20:24", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440034", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3440034", abstract = "Synchronous dataflow graphs are widely used to model digital signal processing and multimedia applications. Self-timed execution is an efficient methodology for the analysis and scheduling of synchronous dataflow graphs. In this article, we propose a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yuan:2021:CCB, author = "Bo Yuan and Xiaofen Lu and Ke Tang and Xin Yao", title = "Cooperative Coevolution-based Design Space Exploration for Multi-mode Dataflow Mapping", journal = j-TECS, volume = "20", number = "3", pages = "21:1--21:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440246", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3440246", abstract = "Some signal processing and multimedia applications can be specified by synchronous dataflow (SDF) models. The problem of SDF mapping to a given set of heterogeneous processors has been known to be NP-hard and widely studied in the design automation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leon:2021:IPP, author = "Vasileios Leon and George Lentaris and Evangelos Petrongonas and Dimitrios Soudris and Gianluca Furano and Antonis Tavoularis and David Moloney", title = "Improving Performance-Power-Programmability in Space Avionics with Edge Devices: {VBN} on Myriad2 {SoC}", journal = j-TECS, volume = "20", number = "3", pages = "22:1--22:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440885", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3440885", abstract = "The advent of powerful edge devices and AI algorithms has already revolutionized many terrestrial applications; however, for both technical and historical reasons, the space industry is still striving to adopt these key enabling technologies in new \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shamsa:2021:UUB, author = "Elham Shamsa and Alma Pr{\"o}bstl and Nima TaheriNejad and Anil Kanduri and Samarjit Chakraborty and Amir M. Rahmani and Pasi Liljeberg", title = "{UBAR}: User- and Battery-aware Resource Management for Smartphones", journal = j-TECS, volume = "20", number = "3", pages = "23:1--23:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3441644", abstract = "Smartphone users require high Battery Cycle Life (BCL) and high Quality of Experience (QoE) during their usage. These two objectives can be conflicting based on the user preference at run-time. Finding the best trade-off between QoE and BCL requires an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rottleuthner:2021:SYP, author = "Michel Rottleuthner and Thomas C. Schmidt and Matthias W{\"a}hlisch", title = "Sense Your Power: The {ECO} Approach to Energy Awareness for {IoT} Devices", journal = j-TECS, volume = "20", number = "3", pages = "24:1--24:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3441643", abstract = "Energy-constrained sensor nodes can adaptively optimize their energy consumption if a continuous measurement is provided. This is of particular importance in scenarios of high dynamics such as with energy harvesting. Still, self-measuring of power \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Marshall:2021:PCP, author = "James Marshall and Robert Gifford and Gedare Bloom and Gabriel Parmer and Rahul Simha", title = "Precise Cache Profiling for Studying Radiation Effects", journal = j-TECS, volume = "20", number = "3", pages = "25:1--25:25", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3442339", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3442339", abstract = "Increased access to space has led to an increase in the usage of commodity processors in radiation environments. These processors are vulnerable to transient faults such as single event upsets that may cause bit-flips in processor components. Caches in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Poudel:2021:MFU, author = "Prawar Poudel and Biswajit Ray and Aleksandar Milenkovic", title = "Microcontroller Fingerprinting Using Partially Erased {NOR} Flash Memory Cells", journal = j-TECS, volume = "20", number = "3", pages = "26:1--26:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448271", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Apr 24 07:51:05 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3448271", abstract = "Electronic device fingerprints, unique bit vectors extracted from device's physical properties, are used to differentiate between instances of functionally identical devices. This article introduces a new technique that extracts fingerprints from unique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Girault:2021:ISI, author = "Alain Girault and Reinhard {Von Hanxleden}", title = "Introduction to the Special Issue on {Specification and Design Languages (FDL 2019)}", journal = j-TECS, volume = "20", number = "4", pages = "27:1--27:3", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3458748", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3458748", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shi:2021:TGH, author = "Zhendong Shi and Haocheng Ma and Qizhi Zhang and Yanjiang Liu and Yiqiang Zhao and Jiaji He", title = "Test Generation for Hardware {Trojan} Detection Using Correlation Analysis and Genetic Algorithm", journal = j-TECS, volume = "20", number = "4", pages = "28:1--28:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446837", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3446837", abstract = "Hardware Trojan (HT) is a major threat to the security of integrated circuits (ICs). Among various HT detection approaches, side channel analysis (SCA)-based methods have been extensively studied. SCA-based methods try to detect HTs by comparing side \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jackson:2021:EES, author = "Riley Jackson and Jonathan Gresl and Ramon Lawrence", title = "Efficient External Sorting for Memory-Constrained Embedded Devices with Flash Memory", journal = j-TECS, volume = "20", number = "4", pages = "29:1--29:21", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446976", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3446976", abstract = "Embedded devices are ubiquitous in areas of industrial and environmental monitoring, health and safety, and consumer appliances. A common use case is data collection, processing, and performing actions based on data analysis. Although many Internet of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rahman:2021:LTW, author = "Mahbubur Rahman and Dali Ismail and Venkata P. Modekurthy and Abusayeed Saifullah", title = "{LPWAN} in the {TV} White Spaces: a Practical Implementation and Deployment Experiences", journal = j-TECS, volume = "20", number = "4", pages = "30:1--30:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447877", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3447877", abstract = "Low-Power Wide-Area Network (LPWAN) is an enabling Internet-of-Things technology that supports long-range, low-power, and low-cost connectivity to numerous devices. To avoid the crowd in the limited ISM band (where most LPWANs operate) and cost of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bombieri:2021:SIS, author = "Nicola Bombieri and Silvia Scaffeo and Antonio Mastrandrea and Simone Caligola and Tommaso Carlucci and Franco Fummi and Carlo Laudanna and Gabriela Constantin and Rosalba Giugno", title = "{SystemC} Implementation of Stochastic {Petri} Nets for Simulation and Parameterization of Biological Networks", journal = j-TECS, volume = "20", number = "4", pages = "31:1--31:20", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3427091", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3427091", abstract = "Model development and simulation of biological networks is recognized as a key task in Systems Biology. Integrated with in vitro and in vivo experimental data, network simulation allows for the discovery of the dynamics that regulate biological systems. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gressl:2021:DSE, author = "Lukas Gressl and Christian Steger and Ulrich Neffe", title = "Design Space Exploration for Secure {IoT} Devices and Cyber-Physical Systems", journal = j-TECS, volume = "20", number = "4", pages = "32:1--32:24", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3430372", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3430372", abstract = "With the advent of the Internet of Things (IoT) and Cyber-Physical Systems (CPS), embedded devices have been gaining importance in our daily lives, as well as industrial processes. Independent of their usage, be it within an IoT system or a CPS, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bruns:2021:TMC, author = "Friederike Bruns and Irune Yarza and Philipp Ittershagen and Kim Gr{\"u}ttner", title = "Time Measurement and Control Blocks for Bare-Metal {C++} Applications", journal = j-TECS, volume = "20", number = "4", pages = "34:1--34:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434401", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3434401", abstract = "Precisely timed execution of resource constrained bare-metal applications is difficult, because the embedded software developer usually has to implement and check the timeliness of the executed application through manual interaction with timers or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dupont:2021:EBH, author = "Guillaume Dupont and Yamine Ait-Ameur and Neeraj Kumar Singh and Marc Pantel", title = "{Event-B} Hybridation: a Proof and Refinement-based Framework for Modelling Hybrid Systems", journal = j-TECS, volume = "20", number = "4", pages = "35:1--35:37", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448270", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3448270", abstract = "Hybrid systems are complex systems where a software controller interacts with a physical environment, usually named a plant, through sensors and actuators. The specification and design of such systems usually rely on the description of both continuous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schulz-Rosengarten:2021:TOO, author = "Alexander Schulz-Rosengarten and Steven Smyth and Michael Mendler", title = "Toward Object-oriented Modeling in {SCCharts}", journal = j-TECS, volume = "20", number = "4", pages = "37:1--37:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453482", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3453482", abstract = "Object orientation is a powerful and widely used paradigm for abstraction and structuring in programming. Many languages are designed with this principle or support different degrees of object orientation. In synchronous languages, originally developed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Broman:2021:IPM, author = "David Broman", title = "Interactive Programmatic Modeling", journal = j-TECS, volume = "20", number = "4", pages = "33:1--33:26", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3431387", abstract = "Modeling and computational analyses are fundamental activities within science and engineering. Analysis activities can take various forms, such as simulation of executable models, formal verification of model properties, or inference of hidden model \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lohstroh:2021:TLF, author = "Marten Lohstroh and Christian Menard and Soroush Bateni and Edward A. Lee", title = "Toward a Lingua Franca for Deterministic Concurrent Systems", journal = j-TECS, volume = "20", number = "4", pages = "36:1--36:27", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448128", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sun Jun 6 07:03:40 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3448128", abstract = "Many programming languages and programming frameworks focus on parallel and distributed computing. Several frameworks are based on actors, which provide a more disciplined model for concurrency than threads. The interactions between actors, however, if \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shrivastava:2020:ISIa, author = "Aviral Shrivastava and Jian-Jia Chen and Youtao Zhang", title = "Introduction to the Special Issue on Languages, Compilers, Tools, and Theory of Embedded Systems: {Part 1}", journal = j-TECS, volume = "19", number = "5", pages = "30:1--30:3", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3417732", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3417732", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2020:DAR, author = "Wanli Chang and Ran Wei and Shuai Zhao and Andy Wellings and Jim Woodcock and Alan Burns", title = "Development Automation of Real-Time {Java}: Model-Driven Transformation and Synthesis", journal = j-TECS, volume = "19", number = "5", pages = "31:1--31:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391897", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391897", abstract = "Many applications in emerging scenarios, such as autonomous vehicles, intelligent robots, and industrial automation, are safety-critical with strict timing requirements. However, the development of real-time systems is error prone and highly dependent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Venkataramani:2020:SSD, author = "Vanchinathan Venkataramani and Aditi Kulkarni and Tulika Mitra and Li-Shiuan Peh", title = "{SPECTRUM}: a Software-defined Predictable Many-core Architecture for {LTE\slash 5G} Baseband Processing", journal = j-TECS, volume = "19", number = "5", pages = "32:1--32:28", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3400032", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3400032", abstract = "Wireless communication standards such as Long-term Evolution (LTE) are rapidly changing to support the high data-rate of wireless devices. The physical layer baseband processing has strict real-time deadlines, especially in the next-generation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reghenzani:2020:DUP, author = "Federico Reghenzani and Luca Santinelli and William Fornaciari", title = "Dealing with Uncertainty in {pWCET} Estimations", journal = j-TECS, volume = "19", number = "5", pages = "33:1--33:23", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396234", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3396234", abstract = "The problem of estimating a tight and safe Worst-Case Execution Time (WCET), needed for certification in safety-critical environment, is a challenging problem for modern embedded systems. A possible solution proposed in past years is to exploit \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Calderon:2020:GUE, author = "Alejandro J. Calder{\'o}n and Leonidas Kosmidis and Carlos F. Nicol{\'a}s and Francisco J. Cazorla and Peio Onaindia", title = "{GMAI}: Understanding and Exploiting the Internals of {GPU} Resource Allocation in Critical Systems", journal = j-TECS, volume = "19", number = "5", pages = "34:1--34:23", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391896", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391896", abstract = "Critical real-time systems require strict resource provisioning in terms of memory and timing. The constant need for higher performance in these systems has led industry to recently include GPUs. However, GPU software ecosystems are by their nature \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2020:CTC, author = "Chundong Wang and Sudipta Chattopadhyay and Gunavaran Brihadiswarn", title = "{Crab-tree}: a Crash Recoverable {B+}-tree Variant for Persistent Memory with {ARMv8} Architecture", journal = j-TECS, volume = "19", number = "5", pages = "35:1--35:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396236", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3396236", abstract = "In recent years, the next-generation non-volatile memory (NVM) technologies have emerged with DRAM-like byte addressability and disk-like durability. Computer architects have proposed to use them to build persistent memory that blurs the conventional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bresch:2020:TXP, author = "Cyril Bresch and David H{\'e}ly and Roman Lysecky and St{\'e}phanie Chollet and Ioannis Parissis", title = "{TrustFlow-X}: a Practical Framework for Fine-grained Control-flow Integrity in Critical Systems", journal = j-TECS, volume = "19", number = "5", pages = "36:1--36:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3398327", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3398327", abstract = "This article addresses the challenges of memory safety in life-critical medical devices. Since the last decade, healthcare manufacturers have embraced the Internet of Things, pushing technological innovations to increase market share. Medical devices, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lukyanov:2020:FVS, author = "Georgy Lukyanov and Andrey Mokhov and Jakob Lechner", title = "Formal Verification of Spacecraft Control Programs", journal = j-TECS, volume = "19", number = "5", pages = "37:1--37:18", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391900", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391900", abstract = "Verification of correctness of control programs is an essential task in the development of space electronics; it is difficult and typically outweighs design and programming tasks in terms of development hours. This article presents a verification \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kadiyala:2020:HPC, author = "Sai Praveen Kadiyala and Pranav Jadhav and Siew-Kei Lam and Thambipillai Srikanthan", title = "Hardware Performance Counter-Based Fine-Grained Malware Detection", journal = j-TECS, volume = "19", number = "5", pages = "38:1--38:17", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3403943", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3403943", abstract = "Detection of malicious programs using hardware-based features has gained prominence recently. The tamper-resistant hardware metrics prove to be a better security feature than the high-level software metrics, which can be easily obfuscated. Hardware \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Stitt:2020:PAI, author = "Greg Stitt and David Campbell", title = "{PANDORA}: an Architecture-Independent Parallelizing Approximation-Discovery Framework", journal = j-TECS, volume = "19", number = "5", pages = "39:1--39:17", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391899", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391899", abstract = "In this article, we introduce a parallelizing approximation-discovery framework, PANDORA, for automatically discovering application- and architecture-specialized approximations of provided code. PANDORA complements existing compilers and runtime \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Atoofian:2020:ACG, author = "Ehsan Atoofian", title = "Approximate Cache in {GPGPUs}", journal = j-TECS, volume = "19", number = "5", pages = "40:1--40:22", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407904", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:34:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3407904", abstract = "There is a growing number of application domains ranging from multimedia to machine learning where a certain level of inexactness can be tolerated. For these applications, approximate computing is an effective technique that trades off some loss in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shrivastava:2020:ISIb, author = "Aviral Shrivastava and Jian-Jia Chen and Youtao Zhang", title = "Introduction to the Special Issue on Languages, Compilers, Tools, and Theory of Embedded Systems: {Part 2}", journal = j-TECS, volume = "19", number = "6", pages = "41:1--41:2", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3417734", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3417734", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsiao:2020:CHC, author = "Luke Hsiao and Sen Wu and Nicholas Chiang and Christopher R{\'e} and Philip Levis", title = "Creating Hardware Component Knowledge Bases with Training Data Generation and Multi-task Learning", journal = j-TECS, volume = "19", number = "6", pages = "42:1--42:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391906", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391906", abstract = "Hardware component databases are vital resources in designing embedded systems. Since creating these databases requires hundreds of thousands of hours of manual data entry, they are proprietary, limited in the data they provide, and have random data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Samragh:2020:ERB, author = "Mohammad Samragh and Mojan Javaheripi and Farinaz Koushanfar", title = "{EncoDeep}: Realizing Bit-flexible Encoding for Deep Neural Networks", journal = j-TECS, volume = "19", number = "6", pages = "43:1--43:29", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391901", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391901", abstract = "This article proposes EncoDeep, an end-to-end framework that facilitates encoding, bitwidth customization, fine-tuning, and implementation of neural networks on FPGA platforms. EncoDeep incorporates nonlinear encoding to the computation flow of neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khan:2020:OTC, author = "Asif Ali Khan and Norman A. Rink and Fazal Hameed and Jeronimo Castrillon", title = "Optimizing Tensor Contractions for Embedded Devices with Racetrack and {DRAM} Memories", journal = j-TECS, volume = "19", number = "6", pages = "44:1--44:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3396235", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3396235", abstract = "Tensor contraction is a fundamental operation in many algorithms with a plethora of applications ranging from quantum chemistry over fluid dynamics and image processing to machine learning. The performance of tensor computations critically depends on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2020:FEE, author = "Saad Ahmed and Naveed Anwar Bhatti and Muhammad Hamad Alizai and Junaid Haroon Siddiqui and Luca Mottola", title = "Fast and Energy-Efficient State Checkpointing for Intermittent Computing", journal = j-TECS, volume = "19", number = "6", pages = "45:1--45:27", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391903", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391903", abstract = "Intermittently powered embedded devices ensure forward progress of programs through state checkpointing in non-volatile memory. Checkpointing is, however, expensive in energy and adds to the execution times. To minimize this overhead, we present DICE, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2020:DIA, author = "Xinyi Li and Lei Zhang and Xipeng Shen", title = "{DIAC}: an Inter-app Conflicts Detector for Open {IoT} Systems", journal = j-TECS, volume = "19", number = "6", pages = "46:1--46:25", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391895", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391895", abstract = "This article tackles the problem of detecting and solving potential conflicts among independently developed apps that are to be installed into an open Internet-of-Things (IoT) environment. It provides a new set of definitions and categorizations of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2020:DEC, author = "Saad Ahmed and Muhammad Nawaz and Abu Bakar and Naveed Anwar Bhatti and Muhammad Hamad Alizai and Junaid Haroon Siddiqui and Luca Mottola", title = "Demystifying Energy Consumption Dynamics in Transiently powered Computers", journal = j-TECS, volume = "19", number = "6", pages = "47:1--47:25", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391893", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391893", abstract = "Transiently powered computers (TPCs) form the foundation of the battery-less Internet of Things, using energy harvesting and small capacitors to power their operation. This kind of power supply is characterized by extreme variations in supply voltage, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wade:2020:EIP, author = "April W. Wade and Prasad A. Kulkarni and Michael R. Jantz", title = "Exploring Impact of Profile Data on Code Quality in the {HotSpot JVM}", journal = j-TECS, volume = "19", number = "6", pages = "48:1--48:26", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391894", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391894", abstract = "Managed language virtual machines (VM) rely on dynamic or just-in-time (JIT) compilation to generate optimized native code at run-time to deliver high execution performance. Many VMs and JIT compilers collect profile data at run-time to enable profile-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Reissmann:2020:RIR, author = "Nico Reissmann and Jan Christian Meyer and Helge Bahmann and Magnus Sj{\"a}lander", title = "{RVSDG}: an Intermediate Representation for Optimizing Compilers", journal = j-TECS, volume = "19", number = "6", pages = "49:1--49:28", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391902", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391902", abstract = "Intermediate Representations (IRs) are central to optimizing compilers as the way the program is represented may enhance or limit analyses and transformations. Suitable IRs focus on exposing the most relevant information and establish invariants that \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Latifis:2020:RMC, author = "Ioannis Latifis and Karthick Parashar and Grigoris Dimitroulakos and Hans Cappelle and Christakis Lezos and Konstantinos Masselos and Francky Catthoor", title = "A Retargetable {MATLAB-to-C} Compiler Exploiting Custom Instructions and Data Parallelism", journal = j-TECS, volume = "19", number = "6", pages = "50:1--50:27", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3391898", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3391898", abstract = "This article presents a MATLAB-to-C compiler that exploits custom instructions present in state-of-the-art processor architectures and supports semi-automatic vectorization. A parameterized processor model is used to describe the target instruction set architecture to achieve user-friendly retargetability. Custom instructions are represented via specialized intrinsic functions in the generated code, which can then be used as input to any C/C++ compiler supporting the target processor. In addition, the compiler supports the generation of data parallel\slash vectorized code through the introduction of data packing\slash unpacking statements. The compiler has been used for code generation targeting ARM and x86 architectures for several benchmarks. The vectorized code generated by the compiler achieves an average speedup of 4.1 $ \times $ and 2.7 $ \times $ for packed fixed and floating point data, respectively, compared to scalarized code for ARM architecture and an average speedup of 3.1 $ \times $ and 1.5 $ \times $ for packed fixed and floating point data, respectively, for x86 architecture. Implementing data parallel instructions directly in the assembly code would have required a lot of design effort, and it would not been sustainable across evolving platform variants. Thus, the compiler can be employed to efficiently speed up critical sections of the target application. The compiler is therefore potentially employable to raise the design abstraction and reduce development time for both embedded and general-purpose applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Susu:2020:VLA, author = "Alexandru E. Susu", title = "A Vector-Length Agnostic Compiler for the {Connex-S} Accelerator with Scratchpad Memory", journal = j-TECS, volume = "19", number = "6", pages = "51:1--51:30", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406536", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:18 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3406536", abstract = "Compiling sequential C programs for Connex-S, a competitive, scalable and customizable, wide vector accelerator for intensive embedded applications with 32 to 4,096 16-bit integer lanes and a limited capacity local scratchpad memory, is challenging. Our \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2021:D, author = "Edward A. Lee", title = "Determinism", journal = j-TECS, volume = "20", number = "5", pages = "38:1--38:34", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453652", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3453652", abstract = "This article is about deterministic models, what they are, why they are useful, and what their limitations are. First, the article emphasizes that determinism is a property of models, not of physical systems. Whether a model is deterministic or not \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leon:2021:IPD, author = "Vasileios Leon and Theodora Paparouni and Evangelos Petrongonas and Dimitrios Soudris and Kiamal Pekmestzi", title = "Improving Power of {DSP} and {CNN} Hardware Accelerators Using Approximate Floating-point Multipliers", journal = j-TECS, volume = "20", number = "5", pages = "39:1--39:21", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448980", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3448980", abstract = "Approximate computing has emerged as a promising design alternative for delivering power-efficient systems and circuits by exploiting the inherent error resiliency of numerous applications. The current article aims to tackle the increased hardware cost of floating-point multiplication units, which prohibits their usage in embedded computing. We introduce AFMU (Approximate Floating-point MUltiplier), an area/power-efficient family of multipliers, which apply two approximation techniques in the resource-hungry mantissa multiplication and can be seamlessly extended to support dynamic configuration of the approximation levels via gating signals. AFMU offers large accuracy configuration margins, provides negligible logic overhead for dynamic configuration, and detects unexpected results that may arise due to the approximations. Our evaluation shows that AFMU delivers energy gains in the range 3.6\%--53.5\% for half-precision and 37.2\%--82.4\% for single-precision, in exchange for mean relative error around 0.05\%--3.33\% and 0.01\%--2.20\%, respectively. In comparison with state-of-the-art multipliers, AFMU exhibits up to 4--6 $ \times $ smaller error on average while delivering more energy-efficient computing. The evaluation in image processing shows that AFMU provides sufficient quality of service, i.e., more than 50db PSNR and near 1 SSIM values, and up to 57.4\% power reduction. When used in floating-point CNNs, the accuracy loss is small (or zero), i.e., up to 5.4\% for MNIST and CIFAR-10, in exchange for up to 63.8\% power gain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Garcia:2021:IHG, author = "Andr{\'e}s Amaya Garc{\'\i}a and David May and Ed Nutting", title = "Integrated Hardware Garbage Collection", journal = j-TECS, volume = "20", number = "5", pages = "40:1--40:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450147", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/csharp.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3450147", abstract = "Garbage collected programming languages, such as Python and C\#, have accelerated software development. These modern languages increase productivity and software reliability as they provide high-level data representation and control structures. Modern languages are widely used in software development for mobile, desktop, and server devices, but their adoption is limited in real-time embedded systems.\par There is clear interest in supporting modern languages in embedded devices as emerging markets, like the Internet of Things, demand ever smarter and more reliable products. Multiple commercial and open-source projects, such as Zerynth and MicroPython, are attempting to provide support. But these projects rely on software garbage collectors that impose high overheads and introduce unpredictable pauses, preventing their use in many embedded applications. These limitations arise from the unsuitability of conventional processors for performing efficient, predictable garbage collection.\par We propose the Integrated Hardware Garbage Collector (IHGC); a garbage collector tightly coupled with the processor that runs continuously in the background. Further, we introduce a static analysis technique to guarantee that real-time programs are never paused by the collector. Our design allocates a memory cycle to the collector when the processor is not using the memory. The IHGC achieves this by careful division of collection work into single-memory-access steps that are interleaved with the processor's memory accesses. As a result, our collector eliminates run-time overheads and enables real-time program analysis.\par The principles behind the IHGC can be used in conjunction with existing architectures. For example, we simulated the IHGC alongside the ARMv6-M architecture. Compared to a conventional processor, our experiments indicate that the IHGC offers 1.5--7 times better performance for programs that rely on garbage collection. The IHGC delivers the benefits of garbage-collected languages with real-time performance but without the complexity and overheads inherent in software collectors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2021:RAS, author = "Yuanbin Zhou and Soheil Samii and Petru Eles and Zebo Peng", title = "Reliability-aware Scheduling and Routing for Messages in Time-sensitive Networking", journal = j-TECS, volume = "20", number = "5", pages = "41:1--41:24", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3458768", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3458768", abstract = "Time-sensitive Networking (TSN) on Ethernet is a promising communication technology in the automotive and industrial automation industries due to its real-time and high-bandwidth communication capabilities. Time-triggered scheduling and static routing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Valente:2021:CMS, author = "Giacomo Valente and Tiziana Fanni and Carlo Sau and Tania {Di Mascio} and Luigi Pomante and Francesca Palumbo", title = "A Composable Monitoring System for Heterogeneous Embedded Platforms", journal = j-TECS, volume = "20", number = "5", pages = "42:1--42:34", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3461647", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3461647", abstract = "Advanced computations on embedded devices are nowadays a must in any application field. Often, to cope with such a need, embedded systems designers leverage on complex heterogeneous reconfigurable platforms that offer high performance, thanks to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Akdur:2021:SGI, author = "Deniz Akdur", title = "Skills Gaps in the Industry: Opinions of Embedded Software Practitioners", journal = j-TECS, volume = "20", number = "5", pages = "43:1--43:39", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3463340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3463340", abstract = "Many practitioners in the software-intensive embedded industry often face difficulties after beginning their careers due to misalignment of the skills learned at the university with what is required in the workplace. Companies spend crucial resources to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aligholipour:2021:TTA, author = "Rashid Aligholipour and Mohammad Baharloo and Behnam Farzaneh and Meisam Abdollahi and Ahmad Khonsari", title = "{TAMA}: Turn-aware Mapping and Architecture --- a Power-efficient Network-on-Chip Approach", journal = j-TECS, volume = "20", number = "5", pages = "44:1--44:24", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462700", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3462700", abstract = "Nowadays, static power consumption in chip multiprocessor (CMP) is the most crucial concern of chip designers. Power-gating is an effective approach to mitigate static power consumption particularly in low utilization. Network-on-Chip (NoC) as the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Roy:2021:SQL, author = "Sanjit Kumar Roy and Rajesh Devaraj and Arnab Sarkar and Debabrata Senapati", title = "{SLAQA}: Quality-level Aware Scheduling of Task Graphs on Heterogeneous Distributed Systems", journal = j-TECS, volume = "20", number = "5", pages = "45:1--45:31", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462776", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3462776", abstract = "Continuous demands for higher performance and reliability within stringent resource budgets is driving a shift from homogeneous to heterogeneous processing platforms for the implementation of today's cyber-physical systems (CPSs). These CPSs are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Modekurthy:2021:DRT, author = "Venkata P. Modekurthy and Abusayeed Saifullah and Sanjay Madria", title = "A Distributed Real-time Scheduling System for Industrial Wireless Networks", journal = j-TECS, volume = "20", number = "5", pages = "46:1--46:28", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464429", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3464429", abstract = "The concept of Industry 4.0 introduces the unification of industrial Internet-of-Things (IoT), cyber physical systems, and data-driven business modeling to improve production efficiency of the factories. To ensure high production efficiency, Industry \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Forsberg:2021:PEM, author = "Bj{\"o}rn Forsberg and Marco Solieri and Marko Bertogna and Luca Benini and Andrea Marongiu", title = "The Predictable Execution Model in Practice: Compiling Real Applications for {COTS} Hardware", journal = j-TECS, volume = "20", number = "5", pages = "47:1--47:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3465370", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3465370", abstract = "Adoption of multi- and many-core processors in real-time systems has so far been slowed down, if not totally barred, due do the difficulty in providing analytical real-time guarantees on worst-case execution times. The Predictable Execution Model (PREM) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Maity:2021:SSO, author = "Biswadip Maity and Bryan Donyanavard and Anmol Surhonne and Amir Rahmani and Andreas Herkersdorf and Nikil Dutt", title = "{SEAMS}: Self-Optimizing Runtime Manager for Approximate Memory Hierarchies", journal = j-TECS, volume = "20", number = "5", pages = "48:1--48:26", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3466875", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3466875", abstract = "Memory approximation techniques are commonly limited in scope, targeting individual levels of the memory hierarchy. Existing approximation techniques for a full memory hierarchy determine optimal configurations at design-time provided a goal and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Witterauf:2021:SLC, author = "Michael Witterauf and Dominik Walter and Frank Hannig and J{\"u}rgen Teich", title = "Symbolic Loop Compilation for Tightly Coupled Processor Arrays", journal = j-TECS, volume = "20", number = "5", pages = "49:1--49:31", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3466897", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Aug 10 13:35:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3466897", abstract = "Tightly Coupled Processor Arrays (TCPAs), a class of massively parallel loop accelerators, allow applications to offload computationally expensive loops for improved performance and energy efficiency. To achieve these two goals, executing a loop on a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bur:2021:WCE, author = "M{\'a}rton B{\'u}r and Krist{\'o}f Marussy and Brett H. Meyer and D{\'a}niel Varr{\'o}", title = "Worst-case Execution Time Calculation for Query-based Monitors by Witness Generation", journal = j-TECS, volume = "20", number = "6", pages = "107:1--107:36", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3471904", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3471904", abstract = "Runtime monitoring plays a key role in the assurance of modern intelligent cyber-physical systems, which are frequently data-intensive and safety-critical. While graph queries can serve as an expressive yet formally precise specification language to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2021:IML, author = "Jurn-Gyu Park and Nikil Dutt and Sung-Soo Lim", title = "An Interpretable Machine Learning Model Enhanced Integrated {CPU--GPU DVFS} Governor", journal = j-TECS, volume = "20", number = "6", pages = "108:1--108:28", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3470974", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3470974", abstract = "Modern heterogeneous CPU-GPU-based mobile architectures, which execute intensive mobile gaming/graphics applications, use software governors to achieve high performance with energy-efficiency. However, existing governors typically utilize simple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ray:2021:HAS, author = "Kaustabha Ray and Ansuman Banerjee", title = "Horizontal Auto-Scaling for Multi-Access Edge Computing Using Safe Reinforcement Learning", journal = j-TECS, volume = "20", number = "6", pages = "109:1--109:33", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3475991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3475991", abstract = "Multi-Access Edge Computing (MEC) has emerged as a promising new paradigm allowing low latency access to services deployed on edge servers to avert network latencies often encountered in accessing cloud services. A key component of the MEC environment is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aydin:2021:HSC, author = "Furkan Aydin and Aydin Aysu and Mohit Tiwari and Andreas Gerstlauer and Michael Orshansky", title = "Horizontal Side-Channel Vulnerabilities of Post-Quantum Key Exchange and Encapsulation Protocols", journal = j-TECS, volume = "20", number = "6", pages = "110:1--110:22", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3476799", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3476799", abstract = "Key exchange protocols and key encapsulation mechanisms establish secret keys to communicate digital information confidentially over public channels. Lattice-based cryptography variants of these protocols are promising alternatives given their quantum-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Giraldo:2021:HAE, author = "J. S. P. Giraldo and Marian Verhelst", title = "Hardware Acceleration for Embedded Keyword Spotting: Tutorial and Survey", journal = j-TECS, volume = "20", number = "6", pages = "111:1--111:25", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3474365", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3474365", abstract = "In recent years, Keyword Spotting (KWS) has become a crucial human-machine interface for mobile devices, allowing users to interact more naturally with their gadgets by leveraging their own voice. Due to privacy, latency and energy requirements, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{RibeiroDaSilva:2021:MCH, author = "Junio Cezar {Ribeiro Da Silva} and Lorena Le{\~a}o and Vinicius Petrucci and Abdoulaye Gamati{\'e} and Fernando Magno {Quint{\~a}o Pereira}", title = "Mapping Computations in Heterogeneous Multicore Systems with Statistical Regression on Program Inputs", journal = j-TECS, volume = "20", number = "6", pages = "112:1--112:35", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3478288", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3478288", abstract = "A hardware configuration is a set of processors and their frequency levels in a multicore heterogeneous system. This article presents a compiler-based technique to match functions with hardware configurations. Such a technique consists of using \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2021:VSH, author = "Yu Wang and Nima Roohi and Matthew West and Mahesh Viswanathan and Geir E. Dullerud", title = "Verifying Stochastic Hybrid Systems with Temporal Logic Specifications via Model Reduction", journal = j-TECS, volume = "20", number = "6", pages = "113:1--113:27", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3483380", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3483380", abstract = "We present a scalable methodology to verify stochastic hybrid systems for inequality linear temporal logic (iLTL) or inequality metric interval temporal logic (iMITL). Using the Mori--Zwanzig reduction method, we construct a finite-state Markov chain \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Servais:2021:ACR, author = "Jason Servais and Ehsan Atoofian", title = "Adaptive Computation Reuse for Energy-Efficient Training of Deep Neural Networks", journal = j-TECS, volume = "20", number = "6", pages = "114:1--114:24", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3487025", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3487025", abstract = "In recent years, Deep Neural Networks (DNNs) have been deployed into a diverse set of applications from voice recognition to scene generation mostly due to their high-accuracy. DNNs are known to be computationally intensive applications, requiring a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saini:2021:IFC, author = "Kanika Saini and Sheetal Kalra and Sandeep K. Sood", title = "{IoT}-Fog-Cloud Centric Earthquake Monitoring and Prediction", journal = j-TECS, volume = "20", number = "6", pages = "115:1--115:26", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3487942", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Dec 10 11:17:19 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3487942", abstract = "Earthquakes are among the most inevitable natural catastrophes. The uncertainty about the severity of the earthquake has a profound effect on the burden of disaster and causes massive economic and societal losses. Although unpredictable, it can be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2022:ISIa, author = "Yuan-Hao Chang and Jalil Boukhobza and Song Han", title = "Introduction to the Special Issue on Memory and Storage Systems for Embedded and {IoT} Applications", journal = j-TECS, volume = "21", number = "1", pages = "1:1--1:4", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3505283", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3505283", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Manohar:2022:CUC, author = "Sheel Sindhu Manohar and Sparsh Mittal and Hemangee K. Kapoor", title = "{CORIDOR}: Using {COherence} and {TempoRal LocalIty} to Mitigate Read Disurbance {ErrOR} in {STT--RAM} Caches", journal = j-TECS, volume = "21", number = "1", pages = "2:1--2:24", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3484493", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3484493", abstract = "In the deep sub-micron region, ``spin-transfer torque RAM'' (STT-RAM) suffers from ``read-disturbance error'' (RDE), whereby a read operation disturbs the stored data. Mitigation of RDE requires restore operations, which imposes latency and energy penalties. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Marinelli:2022:MES, author = "Tommaso Marinelli and Jos{\'e} Ignacio G{\'o}mez P{\'e}rez and Christian Tenllado and Manu Komalan and Mohit Gupta and Francky Catthoor", title = "Microarchitectural Exploration of {STT--MRAM} Last-level Cache Parameters for Energy-efficient Devices", journal = j-TECS, volume = "21", number = "1", pages = "3:1--3:20", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3490391", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3490391", abstract = "As the technology scaling advances, limitations of traditional memories in terms of density and energy become more evident. Modern caches occupy a large part of a CPU physical size and high static leakage poses a limit to the overall efficiency of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wittig:2022:AES, author = "Robert Wittig and Philipp Schulz and Emil Matus and Gerhard P. Fettweis", title = "Accurate Estimation of Service Rates in Interleaved Scratchpad Memory Systems", journal = j-TECS, volume = "21", number = "1", pages = "4:1--4:15", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3457171", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3457171", abstract = "The prototyping of embedded platforms demands rapid exploration of multi-dimensional parameter sets. Especially the design of the memory system is essential to guarantee high utilization while reducing conflicts at the same time. To aid the design process, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hakert:2022:SMR, author = "Christian Hakert and Kuan-Hsun Chen and Horst Schirmeier and Lars Bauer and Paul R. Genssler and Georg von der Br{\"u}ggen and Hussam Amrouch and J{\"o}rg Henkel and Jian-Jia Chen", title = "Software-Managed Read and Write Wear-Leveling for Non-Volatile Main Memory", journal = j-TECS, volume = "21", number = "1", pages = "5:1--5:24", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3483839", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3483839", abstract = "In-memory wear-leveling has become an important research field for emerging non-volatile main memories over the past years. Many approaches in the literature perform wear-leveling by making use of special hardware. Since most non-volatile memories only \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Asifuzzaman:2022:PPE, author = "Kazi Asifuzzaman and Rommel S{\'a}nchez Verdejo and Petar Radojkovi{\'c}", title = "Performance and Power Estimation of {STT--MRAM} Main Memory with Reliable System-level Simulation", journal = j-TECS, volume = "21", number = "1", pages = "6:1--6:25", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476838", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3476838", abstract = "It is questionable whether DRAM will continue to scale and will meet the needs of next-generation systems. Therefore, significant effort is invested in research and development of novel memory technologies. One of the candidates for next-generation memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shin:2022:EED, author = "Dongsuk Shin and Hakbeom Jang and Kiseok Oh and Jae W. Lee", title = "An Energy-Efficient {DRAM} Cache Architecture for Mobile Platforms With {PCM}-Based Main Memory", journal = j-TECS, volume = "21", number = "1", pages = "7:1--7:22", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3451995", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3451995", abstract = "A long battery life is a first-class design objective for mobile devices, and main memory accounts for a major portion of total energy consumption. Moreover, the energy consumption from memory is expected to increase further with ever-growing demands for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wen:2022:SHD, author = "Fei Wen and Mian Qin and Paul Gratz and Narasimha Reddy", title = "Software Hint-Driven Data Management for Hybrid Memory in Mobile Systems", journal = j-TECS, volume = "21", number = "1", pages = "8:1--8:18", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494536", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3494536", abstract = "Hybrid memory systems, comprised of emerging non-volatile memory (NVM) and DRAM, have been proposed to address the growing memory demand of current mobile applications. Recently emerging NVM technologies, such as phase-change memories (PCM), memristor, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zou:2022:DHA, author = "Yu Zou and Amro Awad and Mingjie Lin", title = "{DirectNVM}: Hardware-accelerated {NVMe SSDs} for High-performance Embedded Computing", journal = j-TECS, volume = "21", number = "1", pages = "9:1--9:24", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3463911", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3463911", abstract = "With data-intensive artificial intelligence (AI) and machine learning (ML) applications rapidly surging, modern high-performance embedded systems, with heterogeneous computing resources, critically demand low-latency and high-bandwidth data communication. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Missimer:2022:TRT, author = "Katherine Missimer and Manos Athanassoulis and Richard West", title = "{Telomere}: Real-Time {NAND} Flash Storage", journal = j-TECS, volume = "21", number = "1", pages = "10:1--10:24", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3479157", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3479157", abstract = "Modern solid-state disks achieve high data transfer rates due to their massive internal parallelism. However, out-of-place updates for flash memory incur garbage collection costs when valid data needs to be copied during space reclamation. The root cause of this extra cost is that solid-state disks are not always able to accurately determine data lifetime and group together data that expires before the space needs to be reclaimed. Real-time systems found in autonomous vehicles, industrial control systems, and assembly-line robots store data from hundreds of sensors and often have predictable data lifetimes. These systems require guaranteed high storage bandwidth for read and write operations by mission-critical real-time tasks. In this article, we depart from the traditional block device interface to guarantee the high throughput needed to process large volumes of data. Using data lifetime information from the application layer, our proposed real-time design, called Telomere, is able to intelligently lay out data in NAND flash memory and eliminate valid page copies during garbage collection. Telomere's real-time admission control is able to guarantee tasks their required read and write operations within their periods. Under randomly generated tasksets containing 500 tasks, Telomere achieves 30\% higher throughput with a 5\% storage cost compared to pre-existing techniques.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zou:2022:APS, author = "Yu Zou and Kazi Abu Zubair and Mazen Alwadi and Rakin Muhammad Shadab and Sanjay Gandham and Amro Awad and Mingjie Lin", title = "{ARES}: Persistently Secure Non-Volatile Memory with Processor-transparent and Hardware-friendly Integrity Verification and Metadata Recovery", journal = j-TECS, volume = "21", number = "1", pages = "11:1--11:32", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3492735", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3492735", abstract = "Emerging byte-addressable Non-Volatile Memory (NVM) technology, although promising superior memory density and ultra-low energy consumption, poses unique challenges to achieving persistent data privacy and computing security, both of which are critically \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Eldstaal-Ahrens:2022:CCL, author = "Albin Eldst{\aa}l-Ahrens and Angelos Arelakis and Ioannis Sourdis", title = "{L$^2$C}: Combining Lossy and Lossless Compression on Memory and {I/O}", journal = j-TECS, volume = "21", number = "1", pages = "12:1--12:27", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3481641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3481641", abstract = "In this article, we introduce L$^2$C, a hybrid lossy/lossless compression scheme applicable both to the memory subsystem and I/O traffic of a processor chip. L$^2$C employs general-purpose lossless compression and combines it with state-of-the-art lossy compression to achieve compression ratios up to 16:1 and to improve the utilization of chip's bandwidth resources. Compressing memory traffic yields lower memory access time, improving system performance, and energy efficiency. Compressing I/O traffic offers several benefits for resource-constrained systems, including more efficient storage and networking. We evaluate L$^2$C as a memory compressor in simulation with a set of approximation-tolerant applications. L$^2$C improves baseline execution time by an average of 50\% and total system energy consumption by 16\%. Compared to the lossy and lossless current state-of-the-art memory compression approaches, L$^2$C improves execution time by 9\% and 26\%, respectively, and reduces system energy costs by 3\% and 5\%, respectively. I/O compression efficacy is evaluated using a set of real-life datasets. L$^2$C achieves compression ratios of up to 10.4:1 for a single dataset and on average about 4:1, while introducing no more than 0.4\% error.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nie:2022:HRA, author = "Lanshun Nie and Chenghao Fan and Shuang Lin and Li Zhang and Yajuan Li and Jing Li", title = "Holistic Resource Allocation Under Federated Scheduling for Parallel Real-time Tasks", journal = j-TECS, volume = "21", number = "1", pages = "13:1--13:29", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3489467", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Feb 16 14:00:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3489467", abstract = "With the technology trend of hardware and workload consolidation for embedded systems and the rapid development of edge computing, there has been increasing interest in supporting parallel real-time tasks to better utilize the multi-core platforms while \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Minakova:2022:SBR, author = "Svetlana Minakova and Dolly Sapra and Todor Stefanov and Andy D. Pimentel", title = "Scenario Based Run-Time Switching for Adaptive {CNN}-Based Applications at the Edge", journal = j-TECS, volume = "21", number = "2", pages = "14:1--14:33", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488718", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3488718", abstract = "Convolutional Neural Networks (CNNs) are biologically inspired computational models that are at the heart of many modern computer vision and natural language processing applications. Some of the CNN-based applications are executed on mobile and embedded \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2022:PRA, author = "Xing Chen and Umit Ogras and Chaitali Chakrabarti", title = "Probabilistic Risk-Aware Scheduling with Deadline Constraint for Heterogeneous {SoCs}", journal = j-TECS, volume = "21", number = "2", pages = "15:1--15:27", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3489409", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3489409", abstract = "Hardware Trojans can compromise System-on-Chip (SoC) performance. Protection schemes implemented to combat these threats cannot guarantee 100\% detection rate and may also introduce performance overhead. This paper defines the risk of running a job on an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2022:EEA, author = "Jiankuo Dong and Fangyu Zheng and Jingqiang Lin and Zhe Liu and Fu Xiao and Guang Fan", title = "{EC-ECC}: Accelerating Elliptic Curve Cryptography for Edge Computing on Embedded {GPU TX2}", journal = j-TECS, volume = "21", number = "2", pages = "16:1--16:25", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3492734", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3492734", abstract = "Driven by artificial intelligence and computer vision industries, Graphics Processing Units (GPUs) are now rapidly achieving extraordinary computing power. In particular, the NVIDIA Tegra K1/X1/X2 embedded GPU platforms, which are also treated as edge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Biswas:2022:PNC, author = "Arnab Kumar Biswas and Biplab Sikdar", title = "Protecting Network-on-Chip Intellectual Property Using Timing Channel Fingerprinting", journal = j-TECS, volume = "21", number = "2", pages = "17:1--17:21", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3495565", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3495565", abstract = "The theft of Intellectual property (IP) is a serious security threat for all businesses that are involved in the creation of IP. In this article, we consider such attacks against IP for Network-on-Chip (NoC) that are commonly used as a popular on-chip \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liao:2022:RRS, author = "Jianwei Liao and Jun Li and Mingwang Zhao and Zhibing Sha and Zhigang Cai", title = "Read Refresh Scheduling and Data Reallocation against Read Disturb in {SSDs}", journal = j-TECS, volume = "21", number = "2", pages = "18:1--18:27", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3495254", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3495254", abstract = "Read disturb is a circuit-level noise in flash-based Solid-State Drives (SSDs), induced by intensive read requests, which may result in unexpected read errors. The approach of read refresh (RR) is commonly adopted to mitigate its negative effects by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hong:2022:EGE, author = "Ziyang Hong and C. Patrick Yue", title = "Efficient-Grad: Efficient Training Deep Convolutional Neural Networks on Edge Devices with Gradient Optimizations", journal = j-TECS, volume = "21", number = "2", pages = "19:1--19:24", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3504034", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3504034", abstract = "With the prospering of mobile devices, the distributed learning approach, enabling model training with decentralized data, has attracted great interest from researchers. However, the lack of training capability for edge devices significantly limits the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2022:MSM, author = "Qingling Zhao and Mengfei Qu and Zonghua Gu and Haibo Zeng", title = "Minimizing Stack Memory for Partitioned Mixed-criticality Scheduling on Multiprocessor Platforms", journal = j-TECS, volume = "21", number = "2", pages = "20:1--20:30", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3506703", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 24 15:59:57 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3506703", abstract = "A Mixed-Criticality System (MCS) features the integration of multiple subsystems that are subject to different levels of safety certification on a shared hardware platform. In cost-sensitive application domains such as automotive E/E systems, it is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2022:ISIb, author = "Yuan-Hao Chang and Jalil Boukhobza and Song Han", title = "Introduction to the Special Issue on Memory and Storage Systems for Embedded and {IoT} Applications: {Part 2}", journal = j-TECS, volume = "21", number = "3", pages = "21:1--21:2", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3531707", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3531707", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2022:SLC, author = "Saransh Gupta and Behnam Khaleghi and Sahand Salamat and Justin Morris and Ranganathan Ramkumar and Jeffrey Yu and Aniket Tiwari and Jaeyoung Kang and Mohsen Imani and Baris Aksanli and Tajana Simuni{\'c} Rosing", title = "Store-n-Learn: Classification and Clustering with Hyperdimensional Computing across Flash Hierarchy", journal = j-TECS, volume = "21", number = "3", pages = "22:1--22:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503541", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3503541", abstract = "Processing large amounts of data, especially in learning algorithms, poses a challenge for current embedded computing systems. Hyperdimensional (HD) computing (HDC) is a brain-inspired computing paradigm that works with high-dimensional vectors called \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rrushi:2022:PDP, author = "Julian L. Rrushi", title = "Physics-Driven Page Fault Handling for Customized Deception against {CPS} Malware", journal = j-TECS, volume = "21", number = "3", pages = "23:1--23:36", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502742", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3502742", abstract = "Malware crafted to attack cyber-physical systems such as the electrical power grid have a physics-centric nucleus. Cyber-physical systems malware understand physics and hence use their knowledge to guide how they initiate physical damage on a compromised \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2022:DRR, author = "Wei-Ting Lin and Hsiang-Yun Cheng and Chia-Lin Yang and Meng-Yao Lin and Kai Lien and Han-Wen Hu and Hung-Sheng Chang and Hsiang-Pang Li and Meng-Fan Chang and Yen-Ting Tsou and Chin-Fu Nien", title = "{DL-RSIM}: a Reliability and Deployment Strategy Simulation Framework for {ReRAM}-based {CNN} Accelerators", journal = j-TECS, volume = "21", number = "3", pages = "24:1--24:29", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3507639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3507639", abstract = "Memristor-based deep learning accelerators provide a promising solution to improve the energy efficiency of neuromorphic computing systems. However, the electrical properties and crossbar structure of memristors make these accelerators error-prone. In \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wei:2022:SBD, author = "Qian Wei and Bingzhe Li and Wanli Chang and Zhiping Jia and Zhaoyan Shen and Zili Shao", title = "A Survey of Blockchain Data Management Systems", journal = j-TECS, volume = "21", number = "3", pages = "25:1--25:28", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502741", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3502741", abstract = "Blockchain has been widely deployed in various fields, such as finance, education, and public services. Blockchain has decentralized mechanisms with persistency and auditability and runs as an immutable distributed ledger, where transactions are jointly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bai:2022:FCW, author = "Zhenyu Bai and Hugues Cass{\'e} and Marianne {De Michiel} and Thomas Carle and Christine Rochange", title = "A Framework for Calculating {WCET} Based on Execution Decision Diagrams", journal = j-TECS, volume = "21", number = "3", pages = "26:1--26:26", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476879", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3476879", abstract = "Due to the dynamic behaviour of acceleration mechanisms such as caches and branch predictors, static Worst-case Execution Time (WCET) analysis methods tend to scale poorly to modern hardware architectures. As a result, a trade-off must be found between \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Song:2022:DDB, author = "Shihao Song and Harry Chong and Adarsha Balaji and Anup Das and James Shackleford and Nagarajan Kandasamy", title = "{DFSynthesizer}: Dataflow-based Synthesis of Spiking Neural Networks to Neuromorphic Hardware", journal = j-TECS, volume = "21", number = "3", pages = "27:1--27:35", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3479156", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3479156", abstract = "Spiking Neural Networks (SNNs) are an emerging computation model that uses event-driven activation and bio-inspired learning algorithms. SNN-based machine learning programs are typically executed on tile-based neuromorphic hardware platforms, where each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xiao:2022:CIA, author = "Jun Xiao and Yixian Shen and Andy D. Pimentel", title = "Cache Interference-aware Task Partitioning for Non-preemptive Real-time Multi-core Systems", journal = j-TECS, volume = "21", number = "3", pages = "28:1--28:28", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487581", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3487581", abstract = "Shared caches in multi-core processors introduce serious difficulties in providing guarantees on the real-time properties of embedded software due to the interaction and the resulting contention in the shared caches. Prior work has studied the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ullah:2022:ADA, author = "Salim Ullah and Siva Satyendra Sahoo and Nemath Ahmed and Debabrata Chaudhury and Akash Kumar", title = "{AppAxO}: Designing Application-specific Approximate Operators for {FPGA}-based Embedded Systems", journal = j-TECS, volume = "21", number = "3", pages = "29:1--29:31", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3513262", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3513262", abstract = "Approximate arithmetic operators, such as adders and multipliers, are increasingly used to satisfy the energy and performance requirements of resource-constrained embedded systems. However, most of the available approximate operators have an application-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2022:HEI, author = "Yi-Syuan Lin and Yu-Pei Liang and Tseng-Yi Chen and Yuan-Hao Chang and Shuo-Han Chen and Hsin-Wen Wei and Wei-Kuan Shih", title = "How to Enable Index Scheme for Reducing the Writing Cost of {DNA} Storage on Insertion and Deletion", journal = j-TECS, volume = "21", number = "3", pages = "30:1--30:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3516482", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3516482", abstract = "Recently, the requirement of storing digital data has been growing rapidly; however, the conventional storage medium cannot satisfy these huge demands. Fortunately, thanks to biological technology development, storing digital data into deoxyribonucleic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Raj:2022:RMV, author = "Pani Prithvi Raj and Pakala Akhil Reddy and Nitin Chandrachoodan", title = "Reduced Memory {Viterbi} Decoding for Hardware-accelerated Speech Recognition", journal = j-TECS, volume = "21", number = "3", pages = "31:1--31:18", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510028", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510028", abstract = "Large Vocabulary Continuous Speech Recognition systems require Viterbi searching through a large state space to find the most probable sequence of phonemes that led to a given sound sample. This needs storing and updating of a large Active State List (ASL). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Desai:2022:CLR, author = "Harsh Desai and Matteo Nardello and Davide Brunelli and Brandon Lucia", title = "{Camaroptera}: a Long-range Image Sensor with Local Inference for Remote Sensing Applications", journal = j-TECS, volume = "21", number = "3", pages = "32:1--32:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510850", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510850", abstract = "Batteryless image sensors present an opportunity for long-life, long-range sensor deployments that require zero maintenance, and have low cost. Such deployments are critical for enabling remote sensing applications, e.g., instrumenting national highways, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mao:2022:TEA, author = "Jiachen Mao and Qing Yang and Ang Li and Kent W. Nixon and Hai Li and Yiran Chen", title = "Toward Efficient and Adaptive Design of Video Detection System with Deep Neural Networks", journal = j-TECS, volume = "21", number = "3", pages = "33:1--33:21", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3484946", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3484946", abstract = "In the past decade, Deep Neural Networks (DNNs), e.g., Convolutional Neural Networks, achieved human-level performance in vision tasks such as object classification and detection. However, DNNs are known to be computationally expensive and thus hard to be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2022:SRT, author = "Cong Chen and Zhong Hong and Jian-Min Jiang", title = "Scheduling in Real-Time Mobile Systems", journal = j-TECS, volume = "21", number = "3", pages = "34:1--34:36", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517747", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jul 20 06:57:46 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3517747", abstract = "To guarantee the safety and security of a real-time mobile system such as an intelligent transportation system, it is necessary to model and analyze its behaviors prior to actual development. In particular, the mobile objects in such systems must be \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Trajkovic:2022:PMA, author = "Jelena Trajkovic and Sara Karimi and Samantha Hangsan and Wenlu Zhang", title = "Prediction Modeling for Application-Specific Communication Architecture Design of Optical {NoC}", journal = j-TECS, volume = "21", number = "4", pages = "35:1--35:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520241", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520241", abstract = "Multi-core systems-on-chip are becoming state-of-the-art. Therefore, there is a need for a fast and energy-efficient interconnect to take full advantage of the computational capabilities. Integration of silicon photonics with a traditional electrical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Krishnan:2022:BCS, author = "Archanaa S. Krishnan and Patrick Schaumont", title = "Benchmarking and Configuring Security Levels in Intermittent Computing", journal = j-TECS, volume = "21", number = "4", pages = "36:1--36:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522748", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3522748", abstract = "Intermittent computing derives its name from the intermittent character of the power source used to drive the computing, typically an energy harvester of ambient energy sources. Intermittent computing is characterized by frequent transitions between the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2022:HFY, author = "Shihua Huang and Luc Waeijen and Henk Corporaal", title = "How Flexible is Your Computing System?", journal = j-TECS, volume = "21", number = "4", pages = "37:1--37:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524861", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524861", abstract = "In literature, computer architectures are frequently claimed to be highly flexible, typically implying the existence of trade-offs between flexibility and performance or energy efficiency. Processor flexibility, however, is not very sharply defined, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Isuwa:2022:QMQ, author = "Samuel Isuwa and Somdip Dey and Andre P. Ortega and Amit Kumar Singh and Bashir M. Al-Hashimi and Geoff V. Merrett", title = "{QUAREM}: Maximising {QoE} Through Adaptive Resource Management in Mobile {MPSoC} Platforms", journal = j-TECS, volume = "21", number = "4", pages = "38:1--38:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3526116", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3526116", abstract = "Heterogeneous multi-processor system-on-chip (MPSoC) smartphones are required to offer increasing performance and user quality-of-experience (QoE), despite comparatively slow advances in battery technology. Approaches to balance instantaneous power \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2022:ARF, author = "Yanfeng Chen and Tianyu Zhang and Fanxin Kong and Lin Zhang and Qingxu Deng", title = "Attack-resilient Fusion of Sensor Data with Uncertain Delays", journal = j-TECS, volume = "21", number = "4", pages = "39:1--39:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3532181", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3532181", abstract = "Malicious attackers may disrupt the safety of autonomous systems through compromising sensors to feed wrong measurements to the controller. This article proposes attack-resilient sensor fusion that combines local sensor readings and shared sensing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{France-Pillois:2022:SAR, author = "Maxime France-Pillois and Abdoulaye Gamati{\'e} and Gilles Sassatelli", title = "A Segmented Adaptive Router for Near Energy-Proportional Networks-on-Chip", journal = j-TECS, volume = "21", number = "4", pages = "40:1--40:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529106", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3529106", abstract = "A Network-on-Chip (NoC) is an essential component of a chip multiprocessor (CMP) which however contributes to a large fraction of system energy. The unpredictability of traffic across a NoC frequently involves an expensive over-sizing of NoC resources \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2022:SCF, author = "Tanmaya Mishra and Thidapat Chantem and Ryan Gerdes", title = "Survey of Control-flow Integrity Techniques for Real-time Embedded Systems", journal = j-TECS, volume = "21", number = "4", pages = "41:1--41:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3538275", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3538275", abstract = "Computing systems, including real-time embedded systems, are becoming increasingly connected to allow for more advanced and safer operation. Such embedded systems are also often resource-constrained, for example, with lower processing capabilities \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2022:RID, author = "Tse-Yuan Wang and Chun-Feng Wu and Che-Wei Tsao and Yuan-Hao Chang and Tei-Wei Kuo and Xue Liu", title = "Rethinking the Interactivity of {OS} and Device Layers in Memory Management", journal = j-TECS, volume = "21", number = "4", pages = "42:1--42:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530876", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530876", abstract = "In the big data era, a huge number of services has placed a fast-growing demand on the capacity of DRAM-based main memory. However, due to the high hardware cost and serious leakage power/energy consumption, the growth rate of DRAM capacity cannot meet \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ouyang:2022:WWF, author = "Xiangzhen Ouyang and Yian Zhu", title = "\pkg{wfspan}: Wait-free Dynamic Memory Management", journal = j-TECS, volume = "21", number = "4", pages = "43:1--43:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3533724", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3533724", abstract = "Dynamic memory allocation plays a vital role in modern application programs. Modern lock-free memory allocators based on hardware atomic primitives usually provide good performance. However, threads may starve in these lock-free implementations, leading \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Choi:2022:ECA, author = "Kyubaik Choi and Gerald E. Sobelman", title = "An Efficient {CNN} Accelerator for Low-Cost Edge Systems", journal = j-TECS, volume = "21", number = "4", pages = "44:1--44:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3539224", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3539224", abstract = "Customized hardware based convolutional neural network (CNN or ConvNet) accelerators have attracted significant attention for applications in a low-cost, edge computing system. However, there is a lack of research that seeks to optimize at both the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2022:CBI, author = "Qingling Zhao and Mingqiang Chen and Zonghua Gu and Siyu Luan and Haibo Zeng and Samarjit Chakrabory", title = "{CAN} Bus Intrusion Detection Based on Auxiliary Classifier {GAN} and Out-of-distribution Detection", journal = j-TECS, volume = "21", number = "4", pages = "45:1--45:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3540198", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3540198", abstract = "The Controller Area Network (CAN) is a ubiquitous bus protocol present in the Electrical/Electronic (E/E) systems of almost all vehicles. It is vulnerable to a range of attacks once the attacker gains access to the bus through the vehicle's attack \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Daghero:2022:HAR, author = "Francesco Daghero and Alessio Burrello and Chen Xie and Marco Castellano and Luca Gandolfi and Andrea Calimera and Enrico Macii and Massimo Poncino and Daniele Jahier Pagliari", title = "Human Activity Recognition on Microcontrollers with Quantized and Adaptive Deep Neural Networks", journal = j-TECS, volume = "21", number = "4", pages = "46:1--46:??", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3542819", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Oct 29 08:11:12 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3542819", abstract = "Human Activity Recognition (HAR) based on inertial data is an increasingly diffused task on embedded devices, from smartphones to ultra low-power sensors. Due to the high computational complexity of deep learning models, most embedded HAR systems are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shafique:2022:ISIa, author = "Muhammad Shafique and Theocharis Theocharides and Hai Li and Chun Jason Xue", title = "Introduction to the Special Issue on Accelerating {AI} on the Edge --- {Part 1}", journal = j-TECS, volume = "21", number = "5", pages = "47:1--47:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3558078", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3558078", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mendez:2022:EIC, author = "Javier Mendez and Kay Bierzynski and M. P. Cu{\'e}llar and Diego P. Morales", title = "Edge Intelligence: Concepts, Architectures, Applications, and Future Directions", journal = j-TECS, volume = "21", number = "5", pages = "48:1--48:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3486674", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3486674", abstract = "The name edge intelligence, also known as Edge AI, is a recent term used in the past few years to refer to the confluence of machine learning, or broadly speaking artificial intelligence, with edge computing. In this article, we revise the concepts \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kang:2022:MLM, author = "Chih-Kai Kang and Hashan Roshantha Mendis and Chun-Han Lin and Ming-Syan Chen and Pi-Cheng Hsiu", title = "More Is Less: Model Augmentation for Intermittent Deep Inference", journal = j-TECS, volume = "21", number = "5", pages = "49:1--49:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3506732", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3506732", abstract = "Energy harvesting creates an emerging intermittent computing paradigm but poses new challenges for sophisticated applications such as intermittent deep neural network (DNN) inference. Although model compression has adapted DNNs to resource-constrained \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhu:2022:TUO, author = "Shien Zhu and Luan H. K. Duong and Weichen Liu", title = "{TAB}: Unified and Optimized Ternary, Binary, and Mixed-precision Neural Network Inference on the Edge", journal = j-TECS, volume = "21", number = "5", pages = "50:1--50:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508390", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3508390", abstract = "Ternary Neural Networks (TNNs) and mixed-precision Ternary Binary Networks (TBNs) have demonstrated higher accuracy compared to Binary Neural Networks (BNNs) while providing fast, low-power, and memory-efficient inference. Related works have improved the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jeong:2022:TBF, author = "Eunjin Jeong and Jangryul Kim and Soonhoi Ha", title = "{TensorRT}-Based Framework and Optimization Methodology for Deep Learning Inference on {Jetson} Boards", journal = j-TECS, volume = "21", number = "5", pages = "51:1--51:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508391", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3508391", abstract = "As deep learning inference applications are increasing in embedded devices, an embedded device tends to equip neural processing units (NPUs) in addition to a multi-core CPU and a GPU. NVIDIA Jetson AGX Xavier is an example. For fast and efficient \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kundu:2022:TAA, author = "Souvik Kundu and Yao Fu and Bill Ye and Peter A. Beerel and Massoud Pedram", title = "Toward Adversary-aware Non-iterative Model Pruning through Dynamic Network Rewiring of {DNNs}", journal = j-TECS, volume = "21", number = "5", pages = "52:1--52:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510833", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510833", abstract = "We present a dynamic network rewiring (DNR) method to generate pruned deep neural network (DNN) models that both are robust against adversarially generated images and maintain high accuracy on clean images. In particular, the disclosed DNR training method \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paissan:2022:PSB, author = "Francesco Paissan and Alberto Ancilotto and Elisabetta Farella", title = "{PhiNets}: a Scalable Backbone for Low-power {AI} at the Edge", journal = j-TECS, volume = "21", number = "5", pages = "53:1--53:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510832", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510832", abstract = "In the Internet of Things era, where we see many interconnected and heterogeneous mobile and fixed smart devices, distributing the intelligence from the cloud to the edge has become a necessity. Due to limited computational and communication capabilities, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gomez:2022:DDP, author = "Andres Gomez and Andreas Tretter and Pascal Alexander Hager and Praveenth Sanmugarajah and Luca Benini and Lothar Thiele", title = "Dataflow Driven Partitioning of Machine Learning Applications for Optimal Energy Use in Batteryless Systems", journal = j-TECS, volume = "21", number = "5", pages = "54:1--54:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520135", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520135", abstract = "Sensing systems powered by energy harvesting have traditionally been designed to tolerate long periods without energy. As the Internet of Things (IoT) evolves toward a more transient and opportunistic execution paradigm, reducing energy storage costs will \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kutukcu:2022:CGA, author = "Basar Kutukcu and Sabur Baidya and Anand Raghunathan and Sujit Dey", title = "Contention Grading and Adaptive Model Selection for Machine Vision in Embedded Systems", journal = j-TECS, volume = "21", number = "5", pages = "55:1--55:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520134", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520134", abstract = "Real-time machine vision applications running on resource-constrained embedded systems face challenges for maintaining performance. An especially challenging scenario arises when multiple applications execute at the same time, creating contention for the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jokic:2022:CKE, author = "Petar Jokic and Erfan Azarkhish and Andrea Bonetti and Marc Pons and Stephane Emery and Luca Benini", title = "A Construction Kit for Efficient Low Power Neural Network Accelerator Designs", journal = j-TECS, volume = "21", number = "5", pages = "56:1--56:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520127", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520127", abstract = "Implementing embedded neural network processing at the edge requires efficient hardware acceleration that combines high computational throughput with low power consumption. Driven by the rapid evolution of network architectures and their algorithmic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Resch:2022:EER, author = "Salonik Resch and S. Karen Khatamifard and Zamshed I. Chowdhury and Masoud Zabihi and Zhengyang Zhao and Husrev Cilasun and Jian-Ping Wang and Sachin S. Sapatnekar and Ulya R. Karpuzcu", title = "Energy-efficient and Reliable Inference in Nonvolatile Memory under Extreme Operating Conditions", journal = j-TECS, volume = "21", number = "5", pages = "57:1--57:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520130", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520130", abstract = "Beyond-edge devices can operate outside the reach of the power grid and without batteries. Such devices can be deployed in large numbers in regions that are difficult to access. Using machine learning, these devices can solve complex problems and relay \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Herzog:2022:RDE, author = "Benedict Herzog and Stefan Reif and Judith Hemp and Timo H{\"o}nig and Wolfgang Schr{\"o}der-Preikschat", title = "Resource-demand Estimation for Edge Tensor Processing Units", journal = j-TECS, volume = "21", number = "5", pages = "58:1--58:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520132", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520132", abstract = "Machine learning has shown tremendous success in a large variety of applications. The evolution of machine-learning applications from cloud-based systems to mobile and embedded devices has shifted the focus from only quality-related aspects towards the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hemmat:2022:CCA, author = "Maedeh Hemmat and Joshua {San Miguel} and Azadeh Davoodi", title = "{CAP'NN}: a Class-aware Framework for Personalized Neural Network Inference", journal = j-TECS, volume = "21", number = "5", pages = "59:1--59:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520126", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520126", abstract = "We propose a framework for Class-aware Personalized Neural Network Inference (CAP'NN), which prunes an already-trained neural network model based on the preferences of individual users. Specifically, by adapting to the subset of output classes that each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2022:QST, author = "Jun-Hyung Park and Kang-Min Kim and Sangkeun Lee", title = "Quantized Sparse Training: a Unified Trainable Framework for Joint Pruning and Quantization in {DNNs}", journal = j-TECS, volume = "21", number = "5", pages = "60:1--60:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524066", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524066", abstract = "Deep neural networks typically have extensive parameters and computational operations. Pruning and quantization techniques have been widely used to reduce the complexity of deep models. Both techniques can be jointly used for realizing significantly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baharani:2022:ARE, author = "Mohammadreza Baharani and Hamed Tabkhi", title = "{ATCN}: Resource-efficient Processing of Time Series on Edge", journal = j-TECS, volume = "21", number = "5", pages = "61:1--61:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524070", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524070", abstract = "This article presents a scalable deep learning model called Agile Temporal Convolutional Network (ATCN) for highly accurate fast classification and time series prediction in resource-constrained embedded systems. ATCN is a family of compact networks with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Goyal:2022:HFU, author = "Vidushi Goyal and Reetuparna Das and Valeria Bertacco", title = "Hardware-friendly User-specific Machine Learning for Edge Devices", journal = j-TECS, volume = "21", number = "5", pages = "62:1--62:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524125", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524125", abstract = "Machine learning (ML) on resource-constrained edge devices is expensive and often requires offloading computation to the cloud, which may compromise the privacy of user data. In contrast, the type of data processed at edge devices is user-specific and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{George:2022:UPE, author = "Biji George and Om Ji Omer and Ziaul Choudhury and {Anoop V} and Sreenivas Subramoney", title = "A Unified Programmable Edge Matrix Processor for Deep Neural Networks and Matrix Algebra", journal = j-TECS, volume = "21", number = "5", pages = "63:1--63:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524453", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524453", abstract = "Matrix Algebra and Deep Neural Networks represent foundational classes of computational algorithms across multiple emerging applications like Augmented Reality or Virtual Reality, autonomous navigation (cars, drones, robots), data science, and various \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bouzidi:2022:PMC, author = "Halima Bouzidi and Hamza Ouarnoughi and Smail Niar and Abdessamad {Ait El Cadi}", title = "Performance Modeling of Computer Vision-based {CNN} on Edge {GPUs}", journal = j-TECS, volume = "21", number = "5", pages = "64:1--64:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527169", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3527169", abstract = "Convolutional Neural Networks (CNNs) are currently widely used in various fields, particularly for computer vision applications. Edge platforms have drawn tremendous attention from academia and industry due to their ability to improve execution time and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yuan:2022:MFC, author = "Geng Yuan and Peiyan Dong and Mengshu Sun and Wei Niu and Zhengang Li and Yuxuan Cai and Yanyu Li and Jun Liu and Weiwen Jiang and Xue Lin and Bin Ren and Xulong Tang and Yanzhi Wang", title = "Mobile or {FPGA}? {A} Comprehensive Evaluation on Energy Efficiency and a Unified Optimization Framework", journal = j-TECS, volume = "21", number = "5", pages = "65:1--65:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3528578", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3528578", abstract = "Efficient deployment of Deep Neural Networks (DNNs) on edge devices (i.e., FPGAs and mobile platforms) is very challenging, especially under a recent witness of the increasing DNN model size and complexity. Model compression strategies, including weight \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghasemi:2022:EEE, author = "Mehdi Ghasemi and Daler Rakhmatov and Carole-Jean Wu and Sarma Vrudhula", title = "{EdgeWise}: Energy-efficient {CNN} Computation on Edge Devices under Stochastic Communication Delays", journal = j-TECS, volume = "21", number = "5", pages = "66:1--66:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530908", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:21 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530908", abstract = "This article presents a framework to enable the energy-efficient execution of convolutional neural networks (CNNs) on edge devices. The framework consists of a pair of edge devices connected via a wireless network: a performance and energy-constrained \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shafique:2022:ISIb, author = "Muhammad Shafique and Theocharis Theocharides and Hai (Helen) Li and Chun Jason Xue", title = "Introduction to the Special Issue on Accelerating {AI} on the Edge --- {Part 2}", journal = j-TECS, volume = "21", number = "6", pages = "67:1--67:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3563127", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3563127", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2022:ERD, author = "Kuan-Hsun Chen and Chiahui Su and Christian Hakert and Sebastian Buschj{\"a}ger and Chao-Lin Lee and Jenq-Kuen Lee and Katharina Morik and Jian-Jia Chen", title = "Efficient Realization of Decision Trees for Real-Time Inference", journal = j-TECS, volume = "21", number = "6", pages = "68:1--68:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508019", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3508019", abstract = "For timing-sensitive edge applications, the demand for efficient lightweight machine learning solutions has increased recently. Tree ensembles are among the state-of-the-art in many machine learning applications. While single decision trees are comparably \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2022:BWH, author = "Hongyi Pan and Diaa Badawi and Ahmet Enis Cetin", title = "Block {Walsh-Hadamard} Transform-based Binary Layers in Deep Neural Networks", journal = j-TECS, volume = "21", number = "6", pages = "69:1--69:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510026", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510026", abstract = "Convolution has been the core operation of modern deep neural networks. It is well known that convolutions can be implemented in the Fourier Transform domain. In this article, we propose to use binary block Walsh-Hadamard transform (WHT) instead of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mukherjee:2022:AFD, author = "Arijit Mukherjee and Jayeeta Mondal and Swarnava Dey", title = "Accelerated Fire Detection and Localization at Edge", journal = j-TECS, volume = "21", number = "6", pages = "70:1--70:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510027", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510027", abstract = "Fire-related incidents continue to be reported as a leading cause of life and property destruction. Automated fire detection and localization (AFDL) systems have grown in importance with the evolution of applied robotics, especially because use of robots \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Almeida:2022:DDO, author = "Mario Almeida and Stefanos Laskaridis and Stylianos I. Venieris and Ilias Leontiadis and Nicholas D. Lane", title = "{DynO}: Dynamic Onloading of Deep Neural Networks from Cloud to Device", journal = j-TECS, volume = "21", number = "6", pages = "71:1--71:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510831", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3510831", abstract = "Recently, there has been an explosive growth of mobile and embedded applications using convolutional neural networks (CNNs). To alleviate their excessive computational demands, developers have traditionally resorted to cloud offloading, inducing high \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ganesan:2022:DST, author = "Vinod Ganesan and Pratyush Kumar", title = "Design and Scaffolded Training of an Efficient {DNN} Operator for Computer Vision on the Edge", journal = j-TECS, volume = "21", number = "6", pages = "72:1--72:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511212", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3511212", abstract = "Massively parallel systolic arrays and resource-efficient depthwise separable convolutions are two promising hardware and software techniques to accelerate DNN inference on the edge. Interestingly, their combination is inefficient: Computational patterns \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shahhosseini:2022:OLO, author = "Sina Shahhosseini and Dongjoo Seo and Anil Kanduri and Tianyi Hu and Sung-Soo Lim and Bryan Donyanavard and Amir M. Rahmani and Nikil Dutt", title = "Online Learning for Orchestration of Inference in Multi-user End-edge-cloud Networks", journal = j-TECS, volume = "21", number = "6", pages = "73:1--73:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520129", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520129", abstract = "Deep-learning-based intelligent services have become prevalent in cyber-physical applications, including smart cities and health-care. Deploying deep-learning-based intelligence near the end-user enhances privacy protection, responsiveness, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsouvalas:2022:FST, author = "Vasileios Tsouvalas and Aaqib Saeed and Tanir Ozcelebi", title = "Federated Self-training for Semi-supervised Audio Recognition", journal = j-TECS, volume = "21", number = "6", pages = "74:1--74:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520128", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520128", abstract = "Federated Learning is a distributed machine learning paradigm dealing with decentralized and personal datasets. Since data reside on devices such as smartphones and virtual assistants, labeling is entrusted to the clients or labels are extracted in an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lemaire:2022:SAH, author = "Edgar Lemaire and Beno{\^\i}t Miramond and S{\'e}bastien Bilavarn and Hadi Saoud and Nassim Abderrahmane", title = "Synaptic Activity and Hardware Footprint of Spiking Neural Networks in Digital Neuromorphic Systems", journal = j-TECS, volume = "21", number = "6", pages = "75:1--75:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520133", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520133", abstract = "Spiking neural networks are expected to bring high resources, power, and energy efficiency to machine learning hardware implementations. In this regard, they could facilitate the integration of Artificial Intelligence in highly constrained embedded \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2022:DDC, author = "Yi Yang and Murugan Sankaradas and Srimat Chakradhar", title = "{DyCo}: Dynamic, Contextualized {AI} Models", journal = j-TECS, volume = "21", number = "6", pages = "76:1--76:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3520131", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520131", abstract = "Devices with limited computing resources use smaller AI models to achieve low-latency inferencing. However, model accuracy is typically much lower than the accuracy of a bigger model that is trained and deployed in places where the computing resources are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Song:2022:DTC, author = "Shihao Song and Adarsha Balaji and Anup Das and Nagarajan Kandasamy", title = "Design-Technology Co-Optimization for {NVM-Based} Neuromorphic Processing Elements", journal = j-TECS, volume = "21", number = "6", pages = "77:1--77:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524068", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524068", abstract = "An emerging use case of machine learning (ML) is to train a model on a high-performance system and deploy the trained model on energy-constrained embedded systems. Neuromorphic hardware platforms, which operate on principles of the biological brain, can \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Morris:2022:HUH, author = "Justin Morris and Kazim Ergun and Behnam Khaleghi and Mohen Imani and Baris Aksanli and Tajana Simunic", title = "{HyDREA}: Utilizing Hyperdimensional Computing for a More Robust and Efficient Machine Learning System", journal = j-TECS, volume = "21", number = "6", pages = "78:1--78:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524067", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524067", abstract = "Today's systems rely on sending all the data to the cloud and then using complex algorithms, such as Deep Neural Networks, which require billions of parameters and many hours to train a model. In contrast, the human brain can do much of this learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khan:2022:BIC, author = "Asif Ali Khan and S{\'e}bastien Ollivier and Stephen Longofono and Gerald Hempel and Jeronimo Castrillon and Alex K. Jones", title = "Brain-inspired Cognition in Next-generation Racetrack Memories", journal = j-TECS, volume = "21", number = "6", pages = "79:1--79:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524071", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524071", abstract = "Hyperdimensional computing (HDC) is an emerging computational framework inspired by the brain that operates on vectors with thousands of dimensions to emulate cognition. Unlike conventional computational frameworks that operate on numbers, HDC, like the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alam:2022:WCD, author = "Syed Asad Alam and Andrew Anderson and Barbara Barabasz and David Gregg", title = "{Winograd} Convolution for Deep Neural Networks: Efficient Point Selection", journal = j-TECS, volume = "21", number = "6", pages = "80:1--80:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524069", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3524069", abstract = "Convolutional neural networks (CNNs) have dramatically improved the accuracy of image, video, and audio processing for tasks such as object recognition, image segmentation, and interactive speech systems. CNNs require large amounts of computing resources \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hassantabar:2022:MMH, author = "Shayan Hassantabar and Joe Zhang and Hongxu Yin and Niraj K. Jha", title = "{MHDeep}: Mental Health Disorder Detection System Based on Wearable Sensors and Artificial Neural Networks", journal = j-TECS, volume = "21", number = "6", pages = "81:1--81:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527170", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3527170", abstract = "Mental health problems impact the quality of life of millions of people around the world. However, diagnosis of mental health disorders is a challenging problem that often relies on self-reporting by patients about their behavioral patterns and social \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{HeydariGorji:2022:LCS, author = "Ali HeydariGorji and Siavash Rezaei and Mahdi Torabzadehkashi and Hossein Bobarshad and Vladimir Alves and Pai H. Chou", title = "Leveraging Computational Storage for Power-Efficient Distributed Data Analytics", journal = j-TECS, volume = "21", number = "6", pages = "82:1--82:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3528577", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3528577", abstract = "This article presents a family of computational storage drives (CSDs) and demonstrates their performance and power improvements due to in-storage processing (ISP) when running big data analytics applications. CSDs are an emerging class of solid state \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2022:FDN, author = "Shuwei Li and Changhai Man and Ao Shen and Ziyi Guan and Wei Mao and Shaobo Luo and Rumin Zhang and Hao Yu", title = "A Fall Detection Network by {$2$D\slash} {$3$D} Spatio-temporal Joint Models with Tensor Compression on Edge", journal = j-TECS, volume = "21", number = "6", pages = "83:1--83:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3531004", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3531004", abstract = "Falling is ranked highly among the threats in elderly healthcare, which promotes the development of automatic fall detection systems with extensive concern. With the fast development of the Internet of Things (IoT) and Artificial Intelligence (AI), camera \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Soliman:2022:FFF, author = "Taha Soliman and Nellie Laleni and Tobias Kirchner and Franz M{\"u}ller and Ashish Shrivastava and Thomas K{\"a}mpfe and Andre Guntoro and Norbert Wehn", title = "{FELIX}: a Ferroelectric {FET} Based Low Power Mixed-Signal In-Memory Architecture for {DNN} Acceleration", journal = j-TECS, volume = "21", number = "6", pages = "84:1--84:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529760", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3529760", abstract = "Today, a large number of applications depend on deep neural networks (DNN) to process data and perform complicated tasks at restricted power and latency specifications. Therefore, processing-in-memory (PIM) platforms are actively explored as a promising \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leite:2022:REC, author = "Clayton Frederick Souza Leite and Yu Xiao", title = "Resource-Efficient Continual Learning for Sensor-Based Human Activity Recognition", journal = j-TECS, volume = "21", number = "6", pages = "85:1--85:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530910", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530910", abstract = "Recent advances in deep learning have granted unrivaled performance to sensor-based human activity recognition (HAR). However, in a real-world scenario, the HAR solution is subject to diverse changes over time such as the need to learn new activity \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pal:2022:OEI, author = "Subhankar Pal and Swagath Venkataramani and Viji Srinivasan and Kailash Gopalakrishnan", title = "{OnSRAM}: Efficient Inter-Node On-Chip Scratchpad Management in Deep Learning Accelerators", journal = j-TECS, volume = "21", number = "6", pages = "86:1--86:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3530909", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:23 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530909", abstract = "Hardware acceleration of Artificial Intelligence (AI) workloads has gained widespread popularity with its potential to deliver unprecedented performance and efficiency. An important challenge remains in how AI accelerators are programmed to sustain high \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cai:2023:OOF, author = "Xuyi Cai and Ying Wang and Lei Zhang", title = "{Optimus}: an Operator Fusion Framework for Deep Neural Networks", journal = j-TECS, volume = "22", number = "1", pages = "1:1--1:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3520142", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3520142", abstract = "The reduction of neural parameters and operations for the applications on embedded and IoT platforms in current deep neural network (DNN) architectures has received increasing attention. Relatively, the intermediate feature maps of such lightweight neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Oh:2023:MFL, author = "Deok-Jae Oh and Yaebin Moon and Do Kyu Ham and Tae Jun Ham and Yongjun Park and Jae W. Lee and Jung Ho Ahn and Eojin Lee", title = "{MaPHeA}: a Framework for Lightweight Memory Hierarchy-aware Profile-guided Heap Allocation", journal = j-TECS, volume = "22", number = "1", pages = "2:1--2:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3527853", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3527853", abstract = "Hardware performance monitoring units (PMUs) are a standard feature in modern microprocessors, providing a rich set of microarchitectural event samplers. Recently, numerous profile-guided optimization (PGO) frameworks have exploited them to feature much \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Monniaux:2023:FVL, author = "David Monniaux and Cyril Six", title = "Formally Verified Loop-Invariant Code Motion and Assorted Optimizations", journal = j-TECS, volume = "22", number = "1", pages = "3:1--3:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529507", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3529507", abstract = "We present an approach for implementing a formally certified loop-invariant code motion optimization by composing an unrolling pass and a formally certified yet efficient global subexpression elimination. This approach is lightweight: each pass comes with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wen:2023:WCP, author = "Elliott Wen and Gerald Weber and Suranga Nanayakkara", title = "{WasmAndroid}: a Cross-Platform Runtime for Native Programming Languages on {Android}", journal = j-TECS, volume = "22", number = "1", pages = "4:1--4:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3530286", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530286", abstract = "Open source hardware such as RISC-V has been gaining substantial momentum. Recently, they have begun to embrace Google's Android operating system to leverage its software ecosystem. Despite the encouraging progress, a challenging issue arises: a majority \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2023:FNN, author = "Weiwei Chen and Ying Wang and Ying Xu and Chengsi Gao and Cheng Liu and Lei Zhang", title = "A Framework for Neural Network Architecture and Compile Co-optimization", journal = j-TECS, volume = "22", number = "1", pages = "5:1--5:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533251", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3533251", abstract = "The efficiency of deep neural network (DNN) solutions on real hardware devices are mainly decided by the DNN architecture and the compiler-level scheduling strategy on the hardware. When we try to fully exploit the underlying hardware and obtain the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Young:2023:CAD, author = "May Young and Alan J. Hu and Guy G. F. Lemieux", title = "Cache Abstraction for Data Race Detection in Heterogeneous Systems with Non-coherent Accelerators", journal = j-TECS, volume = "22", number = "1", pages = "6:1--6:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3535457", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3535457", abstract = "Embedded systems are becoming increasingly complex and heterogeneous, featuring multiple processor cores (which might themselves be heterogeneous) as well as specialized hardware accelerators, all accessing shared memory. Many accelerators are non-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brilli:2023:ECM, author = "Gianluca Brilli and Roberto Cavicchioli and Marco Solieri and Paolo Valente and Andrea Marongiu", title = "Evaluating Controlled Memory Request Injection for Efficient Bandwidth Utilization and Predictable Execution in Heterogeneous {SoCs}", journal = j-TECS, volume = "22", number = "1", pages = "7:1--7:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3548773", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3548773", abstract = "High-performance embedded platforms are increasingly adopting heterogeneous systems-on-chip (HeSoC) that couple multi-core CPUs with accelerators such as GPU, FPGA, or AI engines. Adopting HeSoCs in the context of real-time workloads is not immediately \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2023:EFS, author = "Satyajit Das and Kevin Martin and Thomas Peyret and Philippe Coussy", title = "An Efficient and Flexible Stochastic {CGRA} Mapping Approach", journal = j-TECS, volume = "22", number = "1", pages = "8:1--8:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3550071", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3550071", abstract = "Coarse-Grained Reconfigurable Array (CGRA) architectures are promising high-performance and power-efficient platforms. However, mapping applications efficiently on CGRA is a challenging task. This is known to be an NP complete problem. Hence, finding good \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saberi:2023:POT, author = "Iman Saberi and Fathiyeh Faghih and Farzad Sobhi Bavil", title = "A Passive Online Technique for Learning Hybrid Automata from {Input\slash} Output Traces", journal = j-TECS, volume = "22", number = "1", pages = "9:1--9:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3556543", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3556543", abstract = "Specification synthesis is the process of deriving a model from the input-output traces of a system. It is used extensively in test design, reverse engineering, and system identification. One type of the resulting artifact of this process for cyber-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cleaveland:2023:FVN, author = "Rachel Cleaveland and Stefan Mitsch and Andr{\'e} Platzer", title = "Formally Verified Next-generation Airborne Collision Avoidance Games in {ACAS X}", journal = j-TECS, volume = "22", number = "1", pages = "10:1--10:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544970", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544970", abstract = "The design of aircraft collision avoidance algorithms is a subtle but important challenge that merits the need for provable safety guarantees. Obtaining such guarantees is nontrivial given the unpredictability of the interplay of the intruder aircraft \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mondal:2023:HTD, author = "Anindan Mondal and Shubrojyoti Karmakar and Mahabub Hasan Mahalat and Suchismita Roy and Bibhash Sen and Anupam Chattopadhyay", title = "Hardware {Trojan} Detection using Transition Probability with Minimal Test Vectors", journal = j-TECS, volume = "22", number = "1", pages = "11:1--11:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3545000", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3545000", abstract = "Hardware Trojans (HTs) are malicious manipulations of the standard functionality of an integrated circuit (IC). Sophisticated defense against HT attacks has become the utmost current research endeavor. In particular, the HTs whose operations depend on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fradet:2023:RRD, author = "Pascal Fradet and Alain Girault and Ruby Krishnaswamy and Xavier Nicollin and Arash Shafiei", title = "{RDF}: a Reconfigurable Dataflow Model of Computation", journal = j-TECS, volume = "22", number = "1", pages = "12:1--12:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544972", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544972", abstract = "Dataflow Models of Computation (MoCs) are widely used in embedded systems, including multimedia processing, digital signal processing, telecommunications, and automatic control. In a dataflow MoC, an application is specified as a graph of actors connected \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rodionova:2023:TRT, author = "Alena Rodionova and Lars Lindemann and Manfred Morari and George Pappas", title = "Temporal Robustness of Temporal Logic Specifications: Analysis and Control Design", journal = j-TECS, volume = "22", number = "1", pages = "13:1--13:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3550072", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3550072", abstract = "We study the temporal robustness of temporal logic specifications and show how to design temporally robust control laws for time-critical control systems. This topic is of particular interest in connected systems and interleaving processes such as multi-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2023:LVL, author = "Zirui Xu and Fuxun Yu and Chenchen Liu and Xiang Chen", title = "{LanCeX}: a Versatile and Lightweight Defense Method against Condensed Adversarial Attacks in Image and Audio Recognition", journal = j-TECS, volume = "22", number = "1", pages = "14:1--14:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555375", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3555375", abstract = "Convolutional Neural Networks (CNNs) are widely deployed in various embedded recognition applications. However, they demonstrate a considerable vulnerability to adversarial attacks, which leverage the well-designed perturbations to mislead the recognition \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2023:DEL, author = "Wenbo Huang and Lei Zhang and Shuoyuan Wang and Hao Wu and Aiguo Song", title = "Deep Ensemble Learning for Human Activity Recognition Using Wearable Sensors via Filter Activation", journal = j-TECS, volume = "22", number = "1", pages = "15:1--15:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551486", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3551486", abstract = "During the past decade, human activity recognition ( HAR ) using wearable sensors has become a new research hot spot due to its extensive use in various application domains such as healthcare, fitness, smart homes, and eldercare. Deep neural networks, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hessien:2023:PPS, author = "Salah Hessien and Mohamed Hassan", title = "{PISCOT}: a Pipelined Split-Transaction {COTS-Coherent} Bus for Multi-Core Real-Time Systems", journal = j-TECS, volume = "22", number = "1", pages = "16:1--16:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3556975", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3556975", abstract = "Tasks in modern embedded systems such as automotive and avionics communicate among each other using shared data towards achieving the desired functionality of the whole system. In commodity platforms, cores communicate data through the shared memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yeh:2023:WRR, author = "Po-Chen Yeh and Chin-Hsien Wu and Yung-Hsiang Lin and Ming-Yan Wu", title = "A Write-Related and Read-Related {DRAM} Allocation Strategy Inside Solid-State Drives {(SSDs)}", journal = j-TECS, volume = "22", number = "1", pages = "17:1--17:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561301", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561301", abstract = "Although NAND flash memory has the advantages of small size, low-power consumption, shock resistance, and fast access speed, NAND flash memory still faces the problems of ``out-of-place updates,'' ``garbage collection,'' and ``unbalanced execution time'' due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ali:2023:ESE, author = "Ali J. {Ben Ali} and Marziye Kouroshli and Sofiya Semenova and Zakieh Sadat Hashemifar and Steven Y. Ko and Karthik Dantu", title = "{Edge-SLAM}: Edge-Assisted Visual Simultaneous Localization and Mapping", journal = j-TECS, volume = "22", number = "1", pages = "18:1--18:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561972", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561972", abstract = "Localization in urban environments is becoming increasingly important and used in tools such as ARCore [ 18 ], ARKit [ 34 ] and others. One popular mechanism to achieve accurate indoor localization and a map of the space is using Visual Simultaneous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Koh:2023:PST, author = "Jaime Koh and Bruno Bodin", title = "{$K$}-Periodic Scheduling for Throughput-Buffering Trade-Off Exploration of {CSDF}", journal = j-TECS, volume = "22", number = "1", pages = "19:1--19:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3559760", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:25 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3559760", abstract = "The design of time-critical embedded systems often requires static models of computation such as cyclo-static dataflow. These models enable performance guarantees, execution correctness, and optimized memory usage. Nonetheless, determining optimal buffer \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ernst:2023:ACN, author = "Rolf Ernst and Dominik St{\"o}hrmann and Alex Bendrick and Adam Kostrzewa", title = "Application-centric Network Management --- Addressing Safety and Real-time in {V2X} Applications", journal = j-TECS, volume = "22", number = "2", pages = "20:1--20:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3528411", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3528411", abstract = "The current roadmaps and surveys for future wireless networking typically focus on communication and networking technologies and use representative applications to derive future network requirements. Such a benchmarking approach, however, does not cover \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pujol:2023:VEC, author = "Roger Pujol and Josep Jorba and Hamid Tabani and Leonidas Kosmidis and Enrico Mezzetti and Jaume Abella and Francisco Cazorla", title = "Vector Extensions in {COTS} Processors to Increase Guaranteed Performance in Real-Time Systems", journal = j-TECS, volume = "22", number = "2", pages = "21:1--21:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561054", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561054", abstract = "The need for increased application performance in high-integrity systems such as those in avionics is on the rise as software continues to implement more complex functionalities. The prevalent computing solution for future high-integrity embedded products \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sarwar:2023:CPE, author = "Mir Sarwar and Rajarshi Ray and Ansuman Banerjee", title = "A Contrastive Plan Explanation Framework for Hybrid System Models", journal = j-TECS, volume = "22", number = "2", pages = "22:1--22:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561532", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561532", abstract = "In artificial intelligence planning, having an explanation of a plan given by a planner is often desirable. The ability to explain various aspects of a synthesized plan to an end user not only brings in trust on the planner but also reveals insights of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Erata:2023:EEA, author = "Ferhat Erata and Eren Yildiz and Arda Goknil and Kasim Sinan Yildirim and Jakub Szefer and Ruzica Piskac and Gokcin Sezgin", title = "{ETAP}: Energy-aware Timing Analysis of Intermittent Programs", journal = j-TECS, volume = "22", number = "2", pages = "23:1--23:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563216", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3563216", abstract = "Energy harvesting battery-free embedded devices rely only on ambient energy harvesting that enables stand-alone and sustainable IoT applications. These devices execute programs when the harvested ambient energy in their energy reservoir is sufficient to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gkeka:2023:RSC, author = "Maria Rafaela Gkeka and Alexandros Patras and Nikolaos Tavoularis and Stylianos Piperakis and Emmanouil Hourdakis and Panos Trahanias and Christos D. Antonopoulos and Spyros Lalis and Nikolaos Bellas", title = "Reconfigurable System-on-Chip Architectures for Robust Visual {SLAM} on Humanoid Robots", journal = j-TECS, volume = "22", number = "2", pages = "24:1--24:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570210", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3570210", abstract = "Visual Simultaneous Localization and Mapping (vSLAM) is the method of employing an optical sensor to map the robot's observable surroundings while also identifying the robot's pose in relation to that map. The accuracy and speed of vSLAM calculations can \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2023:HPI, author = "Xinyi Hu and Debiao He and Min Luo and Cong Peng and Qi Feng and Xinyi Huang", title = "High-Performance Implementation of the Identity-Based Signature Scheme in {IEEE P1363} on {GPU}", journal = j-TECS, volume = "22", number = "2", pages = "25:1--25:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564784", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3564784", abstract = "Identity-based cryptography is proposed to solve the complicated certificate management of traditional public-key cryptography. The pairing computation and high-level tower extension field arithmetic turn out to be the performance bottleneck of pairing-\ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kundu:2023:SRB, author = "Atanu Kundu and Sarthak Das and Rajarshi Ray", title = "{SAT-Reach}: a Bounded Model Checker for Affine Hybrid Systems", journal = j-TECS, volume = "22", number = "2", pages = "26:1--26:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3567425", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3567425", abstract = "Bounded model checking (BMC) is well-known to be undecidable even for simple hybrid systems. Existing work targeted for a wide class of non-linear hybrid systems reduces the BMC problem to the satisfiability problem of an satisfiability modulo theory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ogras:2023:ISI, author = "Umit Y. Ogras and Radu Marculescu and Trevor N. Mudge and Michael Kishinevsky", title = "Introduction to the Special Issue on Domain-Specific System-on-Chip Architectures and Run-Time Management Techniques", journal = j-TECS, volume = "22", number = "2", pages = "27:1--27:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3567834", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3567834", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Krishnakumar:2023:DSA, author = "Anish Krishnakumar and Umit Ogras and Radu Marculescu and Mike Kishinevsky and Trevor Mudge", title = "Domain-Specific Architectures: Research Problems and Promising Approaches", journal = j-TECS, volume = "22", number = "2", pages = "28:1--28:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563946", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3563946", abstract = "Process technology-driven performance and energy efficiency improvements have slowed down as we approach physical design limits. General-purpose manycore architectures attempt to circumvent this challenge, but they have a significant performance and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2023:EDS, author = "Yueting Li and Wang Kang and Kunyu Zhou and Keni Qiu and Weisheng Zhao", title = "Experimental Demonstration of {STT-MRAM}-based Nonvolatile Instantly On\slash Off System for {IoT} Applications: Case Studies", journal = j-TECS, volume = "22", number = "2", pages = "29:1--29:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546193", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3546193", abstract = "Energy consumption has been a big challenge for electronic devices, particularly for battery-powered Internet of Things (IoT) equipment. To address such a challenge, on the one hand, low-power electronic design methodologies and novel power management \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aminabadi:2023:SAE, author = "Reza Yazdani Aminabadi and Olatunji Ruwase and Minjia Zhang and Yuxiong He and Jose-Maria Arnau and Antonio Gonazalez", title = "{SHARP}: an Adaptable, Energy-Efficient Accelerator for Recurrent Neural Networks", journal = j-TECS, volume = "22", number = "2", pages = "30:1--30:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3552513", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3552513", abstract = "The effectiveness of Recurrent Neural Networks (RNNs) for tasks such as Automatic Speech Recognition has fostered interest in RNN inference acceleration. Due to the recurrent nature and data dependencies of RNN computations, prior work has designed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boroujerdian:2023:FES, author = "Behzad Boroujerdian and Ying Jing and Devashree Tripathy and Amit Kumar and Lavanya Subramanian and Luke Yen and Vincent Lee and Vivek Venkatesan and Amit Jindal and Robert Shearer and Vijay Janapa Reddi", title = "{FARSI}: an Early-stage Design Space Exploration Framework to Tame the Domain-specific System-on-chip Complexity", journal = j-TECS, volume = "22", number = "2", pages = "31:1--31:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544016", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544016", abstract = "Domain-specific SoCs (DSSoCs) are an attractive solution for domains with extremely stringent power, performance, and area constraints. However, DSSoCs suffer from two fundamental complexities. On the one hand, their many specialized hardware blocks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brumar:2023:EDA, author = "Iulian Brumar and Georgios Zacharopoulos and Yuan Yao and Saketh Rama and David Brooks and Gu-Yeon Wei", title = "Early {DSE} and Automatic Generation of Coarse-grained Merged Accelerators", journal = j-TECS, volume = "22", number = "2", pages = "32:1--32:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546070", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3546070", abstract = "Post-Moore's law area-constrained systems rely on accelerators to deliver performance enhancements. Coarse-grained accelerators can offer substantial domain acceleration, but manual, ad hoc identification of code to accelerate is prohibitively expensive. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Inci:2023:QFQ, author = "Ahmet Inci and Siri Virupaksha and Aman Jain and Ting-Wu Chin and Venkata Thallam and Ruizhou Ding and Diana Marculescu", title = "{QUIDAM}: a Framework for Quantization-aware {DNN} Accelerator and Model Co-Exploration", journal = j-TECS, volume = "22", number = "2", pages = "33:1--33:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555807", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3555807", abstract = "As the machine learning and systems communities strive to achieve higher energy efficiency through custom deep neural network (DNN) accelerators, varied precision or quantization levels, and model compression techniques, there is a need for design space \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahangari:2023:HBH, author = "Hamzeh Ahangari and Muhammet Mustafa {\"O}zdal and {\"O}zcan {\"O}zt{\"u}rk", title = "{HLS}-based High-throughput and Work-efficient Synthesizable Graph Processing Template Pipeline", journal = j-TECS, volume = "22", number = "2", pages = "34:1--34:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529256", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3529256", abstract = "Hardware systems composed of diverse execution resources are being deployed to cope with the complexity and performance requirements of Artificial Intelligence (AI) and Machine Learning (ML) applications. With the emergence of new hardware platforms, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Koul:2023:AAA, author = "Kalhan Koul and Jackson Melchert and Kavya Sreedhar and Leonard Truong and Gedeon Nyengele and Keyi Zhang and Qiaoyi Liu and Jeff Setter and Po-Han Chen and Yuchen Mei and Maxwell Strange and Ross Daly and Caleb Donovick and Alex Carsello and Taeyoung Kong and Kathleen Feng and Dillon Huff and Ankita Nayak and Rajsekhar Setaluri and James Thomas and Nikhil Bhagdikar and David Durst and Zachary Myers and Nestan Tsiskaridze and Stephen Richardson and Rick Bahr and Kayvon Fatahalian and Pat Hanrahan and Clark Barrett and Mark Horowitz and Christopher Torng and Fredrik Kjolstad and Priyanka Raina", title = "{AHA}: an Agile Approach to the Design of Coarse-Grained Reconfigurable Accelerators and Compilers", journal = j-TECS, volume = "22", number = "2", pages = "35:1--35:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3534933", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3534933", abstract = "With the slowing of Moore's law, computer architects have turned to domain-specific hardware specialization to continue improving the performance and efficiency of computing systems. However, specialization typically entails significant modifications to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mack:2023:CCI, author = "Joshua Mack and Sahil Hassan and Nirmal Kumbhare and Miguel Castro Gonzalez and Ali Akoglu", title = "{CEDR}: a Compiler-integrated, Extensible {DSSoC} Runtime", journal = j-TECS, volume = "22", number = "2", pages = "36:1--36:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529257", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3529257", abstract = "In this work, we present a Compiler-integrated, Extensible Domain Specific System on Chip Runtime (CEDR) ecosystem to facilitate research toward addressing the challenges of architecture, system software, and application development with distinct plug-and-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2023:ARL, author = "Huili Chen and Xinqiao Zhang and Ke Huang and Farinaz Koushanfar", title = "{AdaTest}: Reinforcement Learning and Adaptive Sampling for On-chip Hardware {Trojan} Detection", journal = j-TECS, volume = "22", number = "2", pages = "37:1--37:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544015", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544015", abstract = "This paper proposes AdaTest, a novel adaptive test pattern generation framework for efficient and reliable Hardware Trojan (HT) detection. HT is a backdoor attack that tampers with the design of victim integrated circuits (ICs). AdaTest improves the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anderson:2023:VPM, author = "Jeff Anderson and Engin Kayraklioglu and Hamid Reza Imani and Chen Shen and Mario Miscuglio and Volker J. Sorger and Tarek El-Ghazawi", title = "Virtualizing a Post-{Moore}'s Law Analog Mesh Processor: The Case of a Photonic {PDE} Accelerator", journal = j-TECS, volume = "22", number = "2", pages = "38:1--38:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544971", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544971", abstract = "Innovative processor architectures aim to play a critical role in future sustainment of performance improvements under severe limitations imposed by the end of Moore's Law. The Reconfigurable Optical Computer (ROC) is one such innovative, Post-Moore's Law \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{S:2023:PQA, author = "Aswathy N. S. and Arnab Sarkar and Hemangee Kapoor", title = "A Predictable {QoS}-aware Memory Request Scheduler for Soft Real-time Systems", journal = j-TECS, volume = "22", number = "2", pages = "39:1--39:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561052", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Mar 11 08:39:26 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561052", abstract = "A memory controller manages the flow of data to and from attached memory devices. The order in which a set of contending memory requests from different tasks are serviced significantly influences the rate of progress and completion times of these tasks. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sifakis:2023:TAS, author = "Joseph Sifakis and David Harel", title = "Trustworthy Autonomous System Development", journal = j-TECS, volume = "22", number = "3", pages = "40:1--40:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3545178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3545178", abstract = "Autonomous systems emerge from the need to progressively replace human operators by autonomous agents in a wide variety of application areas. We offer an analysis of the state of the art in developing autonomous systems, focusing on design and validation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shrivastava:2023:ATS, author = "Aviral Shrivastava and Jian-Jia Chen and Akash Kumar and Anup Das", title = "{ACM TECS} Special Issue on Embedded System Security Tutorials", journal = j-TECS, volume = "22", number = "3", pages = "41:1--41:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594872", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3594872", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2023:TTR, author = "Huili Chen and Farinaz Koushanfar", title = "Tutorial: Toward Robust Deep Learning against Poisoning Attacks", journal = j-TECS, volume = "22", number = "3", pages = "42:1--42:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3574159", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3574159", abstract = "Deep Learning (DL) has been increasingly deployed in various real-world applications due to its unprecedented performance and automated capability of learning hidden representations. While DL can achieve high task performance, the training process of a DL \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Feldtkeller:2023:COS, author = "Jakob Feldtkeller and Pascal Sasdrich and Tim G{\"u}neysu", title = "Challenges and Opportunities of Security-Aware {EDA}", journal = j-TECS, volume = "22", number = "3", pages = "43:1--43:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3576199", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3576199", abstract = "The foundation of every digital system is based on hardware in which security, as a core service of many applications, should be deeply embedded. Unfortunately, the knowledge of system security and efficient hardware design is spread over different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rautakoura:2023:DSH, author = "Antti Rautakoura and Timo H{\"a}m{\"a}l{\"a}inen", title = "Does {SoC} Hardware Development Become Agile by Saying So: a Literature Review and Mapping Study", journal = j-TECS, volume = "22", number = "3", pages = "44:1--44:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578554", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3578554", abstract = "The success of agile development methods in software development has raised interest in System-on-Chip (SoC) design, which involves high architectural and development process complexity under time and project management pressure. This article discovers \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pearce:2023:HLA, author = "Hammond Pearce and Ramesh Karri and Benjamin Tan", title = "High-Level Approaches to Hardware Security: a Tutorial", journal = j-TECS, volume = "22", number = "3", pages = "45:1--45:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3577200", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3577200", abstract = "Designers use third-party intellectual property (IP) cores and outsource various steps in the integrated circuit (IC) design and manufacturing flow. As a result, security vulnerabilities have been rising. This is forcing IC designers and end users to re-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gubbi:2023:HTD, author = "Kevin Immanuel Gubbi and Banafsheh Saber Latibari and Anirudh Srikanth and Tyler Sheaves and Sayed Arash Beheshti-Shirazi and Sai Manoj PD and Satareh Rafatirad and Avesta Sasan and Houman Homayoun and Soheil Salehi", title = "Hardware {Trojan} Detection Using Machine Learning: a Tutorial", journal = j-TECS, volume = "22", number = "3", pages = "46:1--46:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579823", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3579823", abstract = "With the growth and globalization of IC design and development, there is an increase in the number of Designers and Design houses. As setting up a fabrication facility may easily cost upwards of \$20 billion, costs for advanced nodes may be even greater. IC design houses that cannot produce their chips in-house have no option but to use external foundries that are often in other countries. Establishing trust with these external foundries can be a challenge, and these foundries are assumed to be untrusted. The use of these untrusted foundries in the global semiconductor supply chain has raised concerns about the security of the fabricated ICs targeted for sensitive applications. One of these security threats is the adversarial infestation of fabricated ICs with a Hardware Trojan (HT). An HT can be broadly described as a malicious modification to a circuit to control, modify, disable, or monitor its logic. Conventional VLSI manufacturing tests and verification methods fail to detect HT due to the different and unmodeled nature of these malicious modifications. Current state-of-the-art HT detection methods utilize statistical analysis of various side-channel information collected from ICs, such as power analysis, power supply transient analysis, regional supply current analysis, temperature analysis, wireless transmission power analysis, and delay analysis. To detect HTs, most methods require a Trojan-free reference golden IC. A signature from these golden ICs is extracted and used to detect ICs with HTs. However, access to a golden IC is not always feasible. Thus, a mechanism for HT detection is sought that does not require the golden IC. Machine Learning (ML) approaches have emerged to be extremely useful in helping eliminate the need for a golden IC. Recent works on utilizing ML for HT detection have been shown to be promising in achieving this goal. Thus, in this tutorial, we will explain utilizing ML as a solution to the challenge of HT detection. Additionally, we will describe the Electronic Design Automation (EDA) tool flow for automating ML-assisted HT detection. Moreover, to further discuss the benefits of ML-assisted HT detection solutions, we will demonstrate a Neural Network (NN)-assisted timing profiling method for HT detection. Finally, we will discuss the shortcomings and open challenges of ML-assisted HT detection methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2023:TRS, author = "Tailin Liang and Lei Wang and Shaobo Shi and John Glossner and Xiaotong Zhang", title = "{TCX}: a {RISC} Style Tensor Computing Extension and a Programmable Tensor Processor", journal = j-TECS, volume = "22", number = "3", pages = "47:1--47:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568310", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3568310", abstract = "Neural network processors and accelerators are domain-specific architectures deployed to solve the high computational requirements of deep learning algorithms. This article proposes a new instruction set extension for tensor computing, TCX, using Reduced \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dong:2023:RAS, author = "Yi Dong and Wei Huang and Vibhav Bharti and Victoria Cox and Alec Banks and Sen Wang and Xingyu Zhao and Sven Schewe and Xiaowei Huang", title = "Reliability Assessment and Safety Arguments for Machine Learning Components in System Assurance", journal = j-TECS, volume = "22", number = "3", pages = "48:1--48:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570918", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3570918", abstract = "The increasing use of Machine Learning (ML) components embedded in autonomous systems-so-called Learning-Enabled Systems (LESs)-has resulted in the pressing need to assure their functional safety. As for traditional functional safety, the emerging \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yan:2023:MBD, author = "Zujia Yan and Yi Zhuang and Weining Zheng and Jingjing Gu", title = "Multi-bit Data Flow Error Detection Method Based on {SDC} Vulnerability Analysis", journal = j-TECS, volume = "22", number = "3", pages = "49:1--49:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572838", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3572838", abstract = "One of the most difficult data flow errors to detect caused by single-event upsets in space radiation is the Silent Data Corruption (SDC). To solve the problem of multi-bit upsets causing program SDC, an instruction multi-bit SDC vulnerability prediction \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kohler:2023:RCE, author = "Leonie K{\"o}hler and Phil Hertha and Matthias Beckert and Alex Bendrick and Rolf Ernst", title = "Robust Cause-Effect Chains with Bounded Execution Time and System-Level Logical Execution Time", journal = j-TECS, volume = "22", number = "3", pages = "50:1--50:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3573388", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3573388", abstract = "In automotive and industrial real-time software systems, the primary timing constraints relate to cause-effect chains. A cause-effect chain is a sequence of linked tasks and it typically implements the process of reading sensor data, computing algorithms, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tuli:2023:CNA, author = "Shikhar Tuli and Chia-Hao Li and Ritvik Sharma and Niraj K. Jha", title = "{CODEBench}: a Neural Architecture and Hardware Accelerator Co-Design Framework", journal = j-TECS, volume = "22", number = "3", pages = "51:1--51:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575798", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3575798", abstract = "Recently, automated co-design of machine learning (ML) models and accelerator architectures has attracted significant attention from both the industry and academia. However, most co-design frameworks either explore a limited search space or employ \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yi:2023:EEE, author = "Saehanseul Yi and Tae-Wook Kim and Jong-Chan Kim and Nikil Dutt", title = "{EASYR}: Energy-Efficient Adaptive System Reconfiguration for Dynamic Deadlines in Autonomous Driving on Multicore Processors", journal = j-TECS, volume = "22", number = "3", pages = "52:1--52:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570503", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3570503", abstract = "The increasing computing demands of autonomous driving applications have driven the adoption of multicore processors in real-time systems, which in turn renders energy optimizations critical for reducing battery capacity and vehicle weight. A typical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zacharopoulos:2023:TEH, author = "Georgios Zacharopoulos and Adel Ejjeh and Ying Jing and En-Yu Yang and Tianyu Jia and Iulian Brumar and Jeremy Intan and Muhammad Huzaifa and Sarita Adve and Vikram Adve and Gu-Yeon Wei and David Brooks", title = "{Trireme}: Exploration of Hierarchical Multi-level Parallelism for Hardware Acceleration", journal = j-TECS, volume = "22", number = "3", pages = "53:1--53:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580394", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3580394", abstract = "The design of heterogeneous systems that include domain specific accelerators is a challenging and time-consuming process. While taking into account area constraints, designers must decide which parts of an application to accelerate in hardware and which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lindemann:2023:RSS, author = "Lars Lindemann and Lejun Jiang and Nikolai Matni and George J. Pappas", title = "Risk of Stochastic Systems for Temporal Logic Specifications", journal = j-TECS, volume = "22", number = "3", pages = "54:1--54:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580490", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3580490", abstract = "The wide availability of data coupled with the computational advances in artificial intelligence and machine learning promise to enable many future technologies such as autonomous driving. While there has been a variety of successful demonstrations of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yin:2023:CBR, author = "Jun Yin and Marian Verhelst", title = "{CNN}-based Robust Sound Source Localization with {SRP-PHAT} for the Extreme Edge", journal = j-TECS, volume = "22", number = "3", pages = "55:1--55:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586996", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3586996", abstract = "Robust sound source localization for environments with noise and reverberation are increasingly exploiting deep neural networks fed with various acoustic features. Yet, state-of-the-art research mainly focuses on optimizing algorithmic accuracy, resulting \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tabanelli:2023:DAY, author = "Enrico Tabanelli and Giuseppe Tagliavini and Luca Benini", title = "{DNN} Is Not All You Need: Parallelizing Non-neural {ML} Algorithms on Ultra-low-power {IoT} Processors", journal = j-TECS, volume = "22", number = "3", pages = "56:1--56:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571133", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3571133", abstract = "Machine Learning (ML) functions are becoming ubiquitous in latency- and privacy-sensitive IoT applications, prompting a shift toward near-sensor processing at the extreme edge and the consequent increasing adoption of Parallel Ultra-low-power (PULP) IoT \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2023:EAD, author = "Yirui Wu and Lilai Zhang and Zonghua Gu and Hu Lu and Shaohua Wan", title = "Edge-{AI}-Driven Framework with Efficient Mobile Network Design for Facial Expression Recognition", journal = j-TECS, volume = "22", number = "3", pages = "57:1--57:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3587038", abstract = "Facial Expression Recognition (FER) in the wild poses significant challenges due to realistic occlusions, illumination, scale, and head pose variations of the facial images. In this article, we propose an Edge-AI-driven framework for FER. On the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Isik:2023:NNC, author = "Berivan Isik and Kristy Choi and Xin Zheng and Tsachy Weissman and Stefano Ermon and H.-S. Philip Wong and Armin Alaghi", title = "Neural Network Compression for Noisy Storage Devices", journal = j-TECS, volume = "22", number = "3", pages = "58:1--58:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588436", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3588436", abstract = "Compression and efficient storage of neural network (NN) parameters is critical for applications that run on resource-constrained devices. Despite the significant progress in NN model compression, there has been considerably less investigation in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kloda:2023:LLS, author = "Tomasz Kloda and Giovani Gracioli and Rohan Tabish and Reza Mirosanlou and Renato Mancuso and Rodolfo Pellizzoni and Marco Caccamo", title = "Lazy Load Scheduling for Mixed-criticality Applications in Heterogeneous {MPSoCs}", journal = j-TECS, volume = "22", number = "3", pages = "59:1--59:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587694", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Jul 3 08:20:15 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3587694", abstract = "Newly emerging multiprocessor system-on-a-chip (MPSoC) platforms provide hard processing cores with programmable logic (PL) for high-performance computing applications. In this article, we take a deep look into these commercially available heterogeneous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Parra:2023:TMV, author = "Pablo Parra and Antonio {Da Silva} and Borja Losa and J. Ignacio Garc{\'\i}a and {\'O}scar R. Polo and Agust{\'\i}n Mart{\'\i}nez and Sebasti{\'a}n S{\'a}nchez", title = "Tailor-made Virtualization Monitor Design for {CPU} Virtualization on {LEON} Processors", journal = j-TECS, volume = "22", number = "4", pages = "60:1--60:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584702", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3584702", abstract = "In recent decades, mixed-criticality systems have been widely adopted to reduce the complexity and development times of real-time critical applications. In these systems, applications run on a separation kernel hypervisor, a software element that controls \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Papaioannou:2023:ULP, author = "Alexios Papaioannou and Charalampos S. Kouzinopoulos and Dimosthenis Ioannidis and Dimitrios Tzovaras", title = "An Ultra-low-power Embedded {AI} Fire Detection and Crowd Counting System for Indoor Areas", journal = j-TECS, volume = "22", number = "4", pages = "61:1--61:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582433", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3582433", abstract = "Fire incidents in residential and industrial areas are often the cause of human casualties and property damage. Although there are existing systems that detect fire and monitor the presence of people in indoor areas, research on their implementation in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhattacharjee:2023:XEA, author = "Abhiroop Bhattacharjee and Abhishek Moitra and Priyadarshini Panda", title = "{XploreNAS}: Explore Adversarially Robust and Hardware-efficient Neural Architectures for Non-ideal Xbars", journal = j-TECS, volume = "22", number = "4", pages = "62:1--62:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593045", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3593045", abstract = "Compute In-Memory platforms such as memristive crossbars are gaining focus as they facilitate acceleration of Deep Neural Networks (DNNs) with high area and compute efficiencies. However, the intrinsic non-idealities associated with the analog nature of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gunzel:2023:CTA, author = "Mario G{\"u}nzel and Kuan-Hsun Chen and Niklas Ueter and Georg von der Br{\"u}ggen and Marco D{\"u}rr and Jian-Jia Chen", title = "Compositional Timing Analysis of Asynchronized Distributed Cause-effect Chains", journal = j-TECS, volume = "22", number = "4", pages = "63:1--63:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587036", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3587036", abstract = "Real-time systems require the formal guarantee of timing constraints, not only for the individual tasks but also for the end-to-end latency of data flows. The data flow among multiple tasks, e.g., from sensors to actuators, is described by a cause-effect \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shadab:2023:HHC, author = "Rakin Muhammad Shadab and Yu Zou and Sanjay Gandham and Amro Awad and Mingjie Lin", title = "{HMT}: a Hardware-centric Hybrid Bonsai {Merkle} Tree Algorithm for High-performance Authentication", journal = j-TECS, volume = "22", number = "4", pages = "64:1--64:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3595179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3595179", abstract = "The Bonsai Merkle tree (BMT) is a widely used tree structure for authentication of metadata such as encryption counters in a secure computing system. Common BMT algorithms were designed for traditional Von Neumann architectures with a software-centric \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Min:2023:MTK, author = "Donghyun Min and Kihyun Kim and Chaewon Moon and Awais Khan and Seungjin Lee and Changhwan Yun and Woosuk Chung and Youngjae Kim", title = "A Multi-tenant Key-value {SSD} with Secondary Index for Search Query Processing and Analysis", journal = j-TECS, volume = "22", number = "4", pages = "65:1--65:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3590153", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3590153", abstract = "Key-value SSDs (KVSSDs) introduced so far are limited in their use as an alternative to the key-value store running on the host due to the following technical limitations. First, they were designed only for a single tenant, limiting the use of multiple \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2023:OCS, author = "Lin Zhang and Zifan Wang and Fanxin Kong", title = "Optimal Checkpointing Strategy for Real-time Systems with Both Logical and Timing Correctness", journal = j-TECS, volume = "22", number = "4", pages = "66:1--66:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603172", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3603172", abstract = "Real-time systems are susceptible to adversarial factors such as faults and attacks, leading to severe consequences. This paper presents an optimal checkpoint scheme to bolster fault resilience in real-time systems, addressing both logical consistency and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{West:2023:RTU, author = "Richard West and Ahmad Golchin and Anton Njavro", title = "Real-Time {USB} Networking and Device {I/O}", journal = j-TECS, volume = "22", number = "4", pages = "67:1--67:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604429", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3604429", abstract = "Multicore PC-class embedded systems present an opportunity to consolidate separate microcontrollers as software-defined functions. For instance, an automotive system with more than 100 electronic control units (ECUs) could be replaced with one or, at most,. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kohl:2023:MBD, author = "Maximilian A. K{\"o}hl and Holger Hermanns", title = "Model-Based Diagnosis of Real-Time Systems: Robustness Against Varying Latency, Clock Drift, and Out-of-Order Observations", journal = j-TECS, volume = "22", number = "4", pages = "68:1--68:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597209", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3597209", abstract = "Online fault diagnosis techniques are a key enabler of effective failure mitigation. For real-time systems, the problem of identifying faults is aggravated by timing imprecisions such as varying latency between events and their observation. This paper \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Terway:2023:RGM, author = "Prerit Terway and Niraj K. Jha", title = "{REPAIRS}: {Gaussian} Mixture Model-based Completion and Optimization of Partially Specified Systems", journal = j-TECS, volume = "22", number = "4", pages = "69:1--69:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605147", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605147", abstract = "Most system optimization techniques focus on finding the values of the system components to achieve the best performance. Searching over all component values gives the search methodology the freedom to explore the entire design space to determine the best \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsu:2023:GBC, author = "Yao-Jen Hsu and Chin-Hsien Wu and Yu-Chieh Tsai and Chia-Cheng Liu", title = "A Granularity-Based Clustering Method for Reducing Write Amplification in Solid-State Drives", journal = j-TECS, volume = "22", number = "4", pages = "70:1--70:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605779", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605779", abstract = "In recent years, solid-state drives (SSDs) that adopt NAND flash memory have been widely used as the main storage devices. In particular, NAND flash memory has a special feature of ``out-of-place'' updates to write the up-to-date data to a free page, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Niknafs:2023:RRM, author = "Mina Niknafs and Petru Eles and Zebo Peng", title = "Runtime Resource Management with Multiple-Step-Ahead Workload Prediction", journal = j-TECS, volume = "22", number = "4", pages = "71:1--71:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605213", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605213", abstract = "Modern embedded platforms need sophisticated resource managers to utilize their heterogeneous computational resources efficiently. Furthermore, such platforms are subject to fluctuating workloads that are unforeseeable at design time. Predicting the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bosio:2023:SIA, author = "Alberto Bosio and Lara Dolecek and Alexandra Kourfali and Sri Parameswaran and Alessandro Savino", title = "Special Issue: {``Approximation at the Edge''}", journal = j-TECS, volume = "22", number = "4", pages = "72:1--72:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605757", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605757", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pradhan:2023:ETB, author = "Chetana Pradhan and Martin Letras and J{\"u}rgen Teich", title = "Efficient Table-based Function Approximation on {FPGAs} Using Interval Splitting and {BRAM} Instantiation", journal = j-TECS, volume = "22", number = "4", pages = "73:1--73:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580737", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3580737", abstract = "This article proposes a novel approach for the generation of memory-efficient table-based function approximation circuits for edge devices in general and FPGAs in particular. Given a function $ f(x) $ to be approximated in a given interval $ [x_0, x_0 + a) $ and a maximum approximation error $ E_a $, the goal is to determine a function table implementation with a minimized memory footprint, i.e., number of entries that need to be stored. Rather than state-of-the-art work performing an equidistant sampling of the given interval by so-called breakpoints and using linear interpolation between two adjacent breakpoints to determine $ f(x) $ at the maximum error bound, we propose and compare three algorithms for splitting the given interval into sub-intervals to reduce the required memory footprint drastically based on the observation that in sub-intervals of low gradient, a coarser sampling grid may be assumed while guaranteeing the maximum interpolation error bound $ E_a $. Experiments on elementary mathematical functions show that a large fraction in memory footprint may be saved. Second, a hardware architecture implementing the sub-interval selection, breakpoint lookup, and interpolation at a latency of just 9 clock cycles is introduced. Third, for each generated circuit design, BRAMs are automatically instantiated rather than synthesizing the reduced footprint function table using LUT primitives, providing an additional degree of resource efficiency. The approach presented here for FPGAs can equally be applied to other circuit technologies for fast and, at the same time, memory-optimized function approximation at the edge.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Paul:2023:ANI, author = "Sibendu Paul and Utsav Drolia and Y. Charlie Hu and Srimat Chakradhar", title = "{AQuA}: a New Image Quality Metric for Optimizing Video Analytics Systems", journal = j-TECS, volume = "22", number = "4", pages = "74:1--74:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568423", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3568423", abstract = "Millions of cameras at the edge are being deployed to power a variety of different deep learning applications. However, the frames captured by these cameras are not always pristine-they can be distorted due to lighting issues, sensor noise, compression \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Leon-Vega:2023:AGR, author = "Luis G. Le{\'o}n-Vega and Eduardo Salazar-Villalobos and Alejandro Rodriguez-Figueroa and Jorge Castro-God{\'\i}nez", title = "Automatic Generation of Resource and Accuracy Configurable Processing Elements", journal = j-TECS, volume = "22", number = "4", pages = "75:1--75:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594540", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3594540", abstract = "Low-power consumption and scarce computational resources limit the computation at the edge. Besides, the approximate computing paradigm reports promising techniques for designing accelerators to deal with inherent limitations of the edge, and high-level \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Awais:2023:TOS, author = "Muhammad Awais and Ali Zahir and Syed Ayaz Ali Shah and Pedro Reviriego and Anees Ullah and Nasim Ullah and Adam Khan and Hazrat Ali", title = "Toward Optimal Softcore Carry-aware Approximate Multipliers on {Xilinx} {FPGAs}", journal = j-TECS, volume = "22", number = "4", pages = "76:1--76:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564243", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3564243", abstract = "Domain-specific accelerators for signal processing, image processing, and machine learning are increasingly being implemented on SRAM-based field-programmable gate arrays (FPGAs). Owing to the inherent error tolerance of such applications, approximate arithmetic operations, in particular, the design of approximate multipliers, have become an important research problem. Truncation of lower bits is a widely used approximation approach; however, analyzing and limiting the effects of carry-propagation due to this approximation has not been explored in detail yet. In this article, an optimized carry-aware approximate radix-4 Booth multiplier design is presented that leverages the built-in slice look-up tables (LUTs) and carry-chain resources in a novel configuration. The proposed multiplier simplifies the computation of the upper and lower bits and provides significant benefits in terms of FPGA resource usage (LUTs saving 38.5\%--42.9\%), Power Delay Product (PDP saving 49.4\%--53\%), performance metric (LUTs $ \times $ critical path delay (CPD) $ \times $ PDP saving 68.9\%--73.1\%) and errors (70\% improvement in mean relative error distance) compared to the latest state-of-the-art designs. Therefore, the proposed designs are an attractive choice to implement multiplication on FPGA-based accelerators.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2023:EEA, author = "Soumendu Kumar Ghosh and Arnab Raha and Vijay Raghunathan", title = "Energy-Efficient Approximate Edge Inference Systems", journal = j-TECS, volume = "22", number = "4", pages = "77:1--77:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589766", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3589766", abstract = "The rapid proliferation of the Internet of Things and the dramatic resurgence of artificial intelligence based application workloads have led to immense interest in performing inference on energy-constrained edge devices. Approximate computing (a design \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tsounis:2023:MFT, author = "Ioannis Tsounis and Dimitris Agiakatsikas and Mihalis Psarakis", title = "A Methodology for Fault-tolerant {Pareto}-optimal Approximate Designs of {FPGA}-based Accelerators", journal = j-TECS, volume = "22", number = "4", pages = "78:1--78:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568021", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Aug 10 07:21:24 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3568021", abstract = "Approximate Computing Techniques (ACTs) take advantage of resilience computing applications to trade off among output precision, area, power, and performance. ACTs can lead to significant gains at affordable costs when efficiently implemented on Field \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2023:BBS, author = "Yunjie Pan and Jiecao Yu and Andrew Lukefahr and Reetuparna Das and Scott Mahlke", title = "{BitSET}: Bit-Serial Early Termination for Computation Reduction in Convolutional Neural Networks", journal = j-TECS, volume = "22", number = "5s", pages = "98:1--98:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609093", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609093", abstract = "Convolutional Neural Networks (CNNs) have demonstrated remarkable performance across a wide range of machine learning tasks. However, the high accuracy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yang:2023:EEP, author = "Zhao Yang and Qingshuang Sun", title = "Energy-efficient Personalized Federated Search with Graph for Edge Computing", journal = j-TECS, volume = "22", number = "5s", pages = "99:1--99:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609435", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609435", abstract = "Federated Learning (FL) is a popular method for privacy-preserving machine learning on edge devices. However, the heterogeneity of edge devices, including differences \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2023:EEM, author = "Yitu Wang and Shiyu Li and Qilin Zheng and Andrew Chang and Hai Li and Yiran Chen", title = "{EMS-i}: an Efficient Memory System Design with Specialized Caching Mechanism for Recommendation Inference", journal = j-TECS, volume = "22", number = "5s", pages = "100:1--100:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609384", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609384", abstract = "Recommendation systems have been widely embedded into many Internet services. For example, Meta's deep learning recommendation model (DLRM) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sahoo:2023:ATS, author = "Siva Satyendra Sahoo and Salim Ullah and Akash Kumar", title = "{AxOTreeS}: a Tree Search Approach to Synthesizing {FPGA}-based Approximate Operators", journal = j-TECS, volume = "22", number = "5s", pages = "101:1--101:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609096", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609096", abstract = "Approximate computing (AxC) provides the scope for achieving disproportionate gains in a system's power, performance, and area (PPA) metrics by leveraging an \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Afifi:2023:GGN, author = "Salma Afifi and Febin Sunny and Amin Shafiee and Mahdi Nikdast and Sudeep Pasricha", title = "{GHOST}: a Graph Neural Network Accelerator using Silicon Photonics", journal = j-TECS, volume = "22", number = "5s", pages = "102:1--102:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609097", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609097", abstract = "Graph neural networks (GNNs) have emerged as a powerful approach for modelling and learning from graph-structured data. Multiple fields have since \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ren:2023:PWB, author = "Jiankang Ren and Chunxiao Liu and Chi Lin and Ran Bi and Simeng Li and Zheng Wang and Yicheng Qian and Zhichao Zhao and Guozhen Tan", title = "Protection Window Based Security-Aware Scheduling against Schedule-Based Attacks", journal = j-TECS, volume = "22", number = "5s", pages = "103:1--103:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609098", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609098", abstract = "With widespread use of common-off-the-shelf components and the drive towards connection with external environments, the real-time systems are facing more \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sha:2023:PSR, author = "Zhibing Sha and Jiaojiao Wu and Jun Li and Balazs Gerofi and Zhigang Cai and Jianwei Liao", title = "Proactive Stripe Reconstruction to Improve Cache Use Efficiency of {SSD}-Based {RAID} Systems", journal = j-TECS, volume = "22", number = "5s", pages = "104:1--104:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609099", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609099", abstract = "Solid-State Drives (SSDs) exhibit different failure characteristics compared to conventional hard disk drives. In particular, the Bit Error Rate (BER) of an SSD \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mousavi:2023:DDA, author = "Hamid Mousavi and Mohammad Loni and Mina Alibeigi and Masoud Daneshtalab", title = "{DASS}: Differentiable Architecture Search for Sparse Neural Networks", journal = j-TECS, volume = "22", number = "5s", pages = "105:1--105:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609385", abstract = "The deployment of Deep Neural Networks (DNNs) on edge devices is hindered by the substantial gap between performance requirements and available \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Clair:2023:SED, author = "Judicael Clair and Guy Eichler and Luca P. Carloni", title = "{SpikeHard}: Efficiency-Driven Neuromorphic Hardware for Heterogeneous Systems-on-Chip", journal = j-TECS, volume = "22", number = "5s", pages = "106:1--106:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609101", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609101", abstract = "Neuromorphic computing is an emerging field with the potential to offer performance and energy-efficiency gains over traditional machine learning approaches. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Klashtorny:2023:PGW, author = "Artem Klashtorny and Zhuanhao Wu and Anirudh Mohan Kaushik and Hiren Patel", title = "Predictable {GPU} Wavefront Splitting for Safety-Critical Systems", journal = j-TECS, volume = "22", number = "5s", pages = "107:1--107:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609102", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609102", abstract = "We present a predictable wavefront splitting (PWS) technique for graphics processing units (GPUs). PWS improves the performance of GPU applications by reducing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Odema:2023:MMA, author = "Mohanad Odema and Halima Bouzidi and Hamza Ouarnoughi and Smail Niar and Mohammad Abdullah {Al Faruque}", title = "{MaGNAS}: a Mapping-Aware Graph Neural Architecture Search Framework for Heterogeneous {MPSoC} Deployment", journal = j-TECS, volume = "22", number = "5s", pages = "108:1--108:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609386", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609386", abstract = "Graph Neural Networks (GNNs) are becoming increasingly popular for vision-based applications due to their intrinsic capacity in modeling structural and contextual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mondal:2023:PPR, author = "Anupam Mondal and Shreya Gangopadhyay and Durba Chatterjee and Harishma Boyapally and Debdeep Mukhopadhyay", title = "{PReFeR}: Physically Related Function based Remote Attestation Protocol", journal = j-TECS, volume = "22", number = "5s", pages = "109:1--109:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609104", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609104", abstract = "Remote attestation is a request-response based security service that permits a trusted entity (verifier) to check the current state of an untrusted remote \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ikeda:2023:MDD, author = "Sosei Ikeda and Hiromitsu Awano and Takashi Sato", title = "Modular {DFR}: Digital Delayed Feedback Reservoir Model for Enhancing Design Flexibility", journal = j-TECS, volume = "22", number = "5s", pages = "110:1--110:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609105", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609105", abstract = "A delayed feedback reservoir (DFR) is a type of reservoir computing system well-suited for hardware implementations owing to its simple structure. Most existing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2023:VVA, author = "Vishesh Mishra and Sparsh Mittal and Neelofar Hassan and Rekha Singhal and Urbi Chatterjee", title = "{VADF}: Versatile Approximate Data Formats for Energy-Efficient Computing", journal = j-TECS, volume = "22", number = "5s", pages = "111:1--111:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609106", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609106", abstract = "Approximate computing (AC) techniques provide overall performance gains in terms of power and energy savings at the cost of minor loss in application accuracy. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Halder:2023:OPN, author = "Dipal Halder and Maneesh Merugu and Sandip Ray", title = "{ObNoCs}: Protecting Network-on-Chip Fabrics Against Reverse-Engineering Attacks", journal = j-TECS, volume = "22", number = "5s", pages = "112:1--112:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609107", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609107", abstract = "Modern System-on-Chip designs typically use Network-on-Chip (NoC) fabrics to implement coordination among integrated hardware blocks. An important \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Basaklar:2023:DDT, author = "Toygun Basaklar and A. Alper Goksoy and Anish Krishnakumar and Suat Gumussoy and Umit Y. Ogras", title = "{DTRL}: Decision Tree-based Multi-Objective Reinforcement Learning for Runtime Task Scheduling in Domain-Specific System-on-Chips", journal = j-TECS, volume = "22", number = "5s", pages = "113:1--113:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609108", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609108", abstract = "Domain-specific systems-on-chip (DSSoCs) combine general-purpose processors and specialized hardware accelerators to improve performance and energy efficiency \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Juang:2023:LCG, author = "Tzung-Han Juang and Christof Schlaak and Christophe Dubach", title = "Let Coarse-Grained Resources Be Shared: Mapping Entire Neural Networks on {FPGAs}", journal = j-TECS, volume = "22", number = "5s", pages = "114:1--114:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609109", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609109", abstract = "Traditional High-Level Synthesis (HLS) provides rapid prototyping of hardware accelerators without coding with Hardware Description Languages (HDLs). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bakshi:2023:CED, author = "Suyash Bakshi and Lennart Johnsson", title = "Computationally Efficient {DNN} Mapping Search Heuristic using Deep Reinforcement Learning", journal = j-TECS, volume = "22", number = "5s", pages = "115:1--115:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609110", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609110", abstract = "In this work, we present a computationally efficient Reinforcement Learning mapping search heuristic for finding high quality mappings for N-dimensional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hussein:2023:CNC, author = "Dina Hussein and Ganapati Bhat", title = "{CIM}: a Novel Clustering-based Energy-Efficient Data Imputation Method for Human Activity Recognition", journal = j-TECS, volume = "22", number = "5s", pages = "116:1--116:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609111", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609111", abstract = "Human activity recognition (HAR) is an important component in a number of health applications, including rehabilitation, Parkinson's disease, daily activity \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "116", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ravi:2023:VLV, author = "Akshara Ravi and Vivek Chaturvedi and Muhammad Shafique", title = "{ViT4Mal}: Lightweight Vision Transformer for Malware Detection on Edge Devices", journal = j-TECS, volume = "22", number = "5s", pages = "117:1--117:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609112", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609112", abstract = "There has been a tremendous growth of edge devices connected to the network in recent years. Although these devices make our life simpler and smarter, they \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "117", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deb:2023:ZDT, author = "Dipika Deb and John Jose", title = "{ZPP}: a Dynamic Technique to Eliminate Cache Pollution in {NoC} based {MPSoCs}", journal = j-TECS, volume = "22", number = "5s", pages = "118:1--118:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609113", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609113", abstract = "Data prefetching efficiently reduces the memory access latency in NUCA architectures as the Last Level Cache (LLC) is shared and distributed across \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "118", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2023:WTM, author = "Shin-Ting Wu and Liang-Chi Chen and Po-Chun Huang and Yuan-Hao Chang and Chien-Chung Ho and Wei-Kuan Shih", title = "{WARM}-tree: Making Quadtrees Write-efficient and Space-economic on Persistent Memories", journal = j-TECS, volume = "22", number = "5s", pages = "119:1--119:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608033", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608033", abstract = "Recently, the value of data has been widely recognized, which highlights the significance of data-centric computing in diversified application scenarios. In many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "119", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shen:2023:TMS, author = "Yixian Shen and Leo Schreuders and Anuj Pathania and Andy D. Pimentel", title = "Thermal Management for {$3$D}-Stacked Systems via Unified Core-Memory Power Regulation", journal = j-TECS, volume = "22", number = "5s", pages = "120:1--120:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608040", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608040", abstract = "3D-stacked processor-memory systems stack memory (DRAM banks) directly on top of logic (CPU cores) using chiplet-on-chiplet packaging technology to provide \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "120", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ponzina:2023:OFC, author = "Flavio Ponzina and Marco Rios and Alexandre Levisse and Giovanni Ansaloni and David Atienza", title = "Overflow-free Compute Memories for Edge {AI} Acceleration", journal = j-TECS, volume = "22", number = "5s", pages = "121:1--121:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609387", abstract = "Compute memories are memory arrays augmented with dedicated logic to support arithmetic. They support the efficient execution of data-centric \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "121", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vali:2023:BSD, author = "Kourosh Vali and Ata Vafi and Begum Kasap and Soheil Ghiasi", title = "{BASS}: Safe Deep Tissue Optical Sensing for Wearable Embedded Systems", journal = j-TECS, volume = "22", number = "5s", pages = "122:1--122:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607916", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607916", abstract = "In wearable optical sensing applications whose target tissue is not superficial, such as deep tissue oximetry, the task of embedded system design has to strike a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "122", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huai:2023:CCR, author = "Shuo Huai and Hao Kong and Xiangzhong Luo and Shiqing Li and Ravi Subramaniam and Christian Makaya and Qian Lin and Weichen Liu", title = "{CRIMP}: Compact \& Reliable {DNN} Inference on In-Memory Processing via Crossbar-Aligned Compression and Non-ideality Adaptation", journal = j-TECS, volume = "22", number = "5s", pages = "123:1--123:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609115", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609115", abstract = "Crossbar-based In-Memory Processing (IMP) accelerators have been widely adopted to achieve high-speed and low-power computing, especially for deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "123", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yen:2023:KBR, author = "Chih-Hsuan Yen and Hashan Roshantha Mendis and Tei-Wei Kuo and Pi-Cheng Hsiu", title = "Keep in Balance: Runtime-reconfigurable Intermittent Deep Inference", journal = j-TECS, volume = "22", number = "5s", pages = "124:1--124:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607918", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607918", abstract = "Intermittent deep neural network (DNN) inference is a promising technique to enable intelligent applications on tiny devices powered by ambient energy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "124", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gufran:2023:FHR, author = "Danish Gufran and Sudeep Pasricha", title = "{FedHIL}: Heterogeneity Resilient Federated Learning for Robust Indoor Localization with Mobile Devices", journal = j-TECS, volume = "22", number = "5s", pages = "125:1--125:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607919", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607919", abstract = "Indoor localization plays a vital role in applications such as emergency response, warehouse management, and augmented reality experiences. By deploying \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "125", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xia:2023:SPS, author = "Chengpeng Xia and Yawen Chen and Haibo Zhang and Jigang Wu", title = "{STADIA}: Photonic Stochastic Gradient Descent for Neural Network Accelerators", journal = j-TECS, volume = "22", number = "5s", pages = "126:1--126:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607920", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607920", abstract = "Deep Neural Networks (DNNs) have demonstrated great success in many fields such as image recognition and text analysis. However, the ever-increasing sizes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "126", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2023:LEL, author = "Jung-Hsiu Chang and Tzu-Yu Chang and Yi-Chao Shih and Tseng-Yi Chen", title = "{LaDy}: Enabling Locality-aware Deduplication Technology on Shingled Magnetic Recording Drives", journal = j-TECS, volume = "22", number = "5s", pages = "127:1--127:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607921", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607921", abstract = "The continuous increase in data volume has led to the adoption of shingled-magnetic recording (SMR) as the primary technology for modern storage drives. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "127", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lien:2023:FFS, author = "Yi-Han Lien and Yen-Ting Chen and Yuan-Hao Chang and Yu-Pei Liang and Wei-Kuan Shih", title = "{FSIMR}: File-system-aware Data Management for Interlaced Magnetic Recording", journal = j-TECS, volume = "22", number = "5s", pages = "128:1--128:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607922", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607922", abstract = "Interlaced Magnetic Recording (IMR) is an emerging recording technology for hard-disk drives (HDDs) that provides larger storage capacity at a lower cost. By \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "128", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2023:IIE, author = "Wentong Li and Liang Shi and Hang Li and Changlong Li and Edwin Hsing-Mean Sha", title = "{IOSR}: Improving {I/O} Efficiency for Memory Swapping on Mobile Devices Via Scheduling and Reshaping", journal = j-TECS, volume = "22", number = "5s", pages = "129:1--129:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607923", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607923", abstract = "Mobile systems and applications are becoming increasingly feature-rich and powerful, which constantly suffer from memory pressure, especially for \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "129", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Modi:2023:CRR, author = "Garima Modi and Aritra Bagchi and Neetu Jindal and Ayan Mandal and Preeti Ranjan Panda", title = "{CABARRE}: Request Response Arbitration for Shared Cache Management", journal = j-TECS, volume = "22", number = "5s", pages = "130:1--130:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608096", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608096", abstract = "Modern multi-processor systems-on-chip (MPSoCs) are characterized by caches shared by multiple cores. These shared caches receive requests issued by the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "130", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmed:2023:SAH, author = "Soyed Tuhin Ahmed and Kamal Danouchi and Michael Hefenbrock and Guillaume Prenat and Lorena Anghel and Mehdi B. Tahoori", title = "{SpinBayes}: Algorithm-Hardware Co-Design for Uncertainty Estimation Using {Bayesian} In-Memory Approximation on Spintronic-Based Architectures", journal = j-TECS, volume = "22", number = "5s", pages = "131:1--131:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609116", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609116", abstract = "Recent development in neural networks (NNs) has led to their widespread use in critical and automated decision-making systems, where uncertainty \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "131", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sharma:2023:FCD, author = "Harsh Sharma and Lukas Pfromm and Rasit Onur Topaloglu and Janardhan Rao Doppa and Umit Y. Ogras and Ananth Kalyanraman and Partha Pratim Pande", title = "Florets for Chiplets: Data Flow-aware High-Performance and Energy-efficient Network-on-Interposer for {CNN} Inference Tasks", journal = j-TECS, volume = "22", number = "5s", pages = "132:1--132:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608098", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608098", abstract = "Recent advances in 2.5D chiplet platforms provide a new avenue for compact scale-out implementations of emerging compute- and data-intensive applications \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "132", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Nassar:2023:APM, author = "Hassan Nassar and Lars Bauer and J{\"o}rg Henkel", title = "{ANV-PUF}: Machine-Learning-Resilient {NVM}-Based Arbiter {PUF}", journal = j-TECS, volume = "22", number = "5s", pages = "133:1--133:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609388", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609388", abstract = "Physical Unclonable Functions (PUFs) have been widely considered an attractive security primitive. They use the deviations in the fabrication process to have unique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "133", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sorrentino:2023:HCA, author = "Giuseppe Sorrentino and Marco Venere and Davide Conficconi and Eleonora D'Arnese and Marco Domenico Santambrogio", title = "{Hephaestus}: Codesigning and Automating {$3$D} Image Registration on Reconfigurable Architectures", journal = j-TECS, volume = "22", number = "5s", pages = "134:1--134:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607928", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607928", abstract = "Healthcare is a pivotal research field, and medical imaging is crucial in many applications. Therefore finding new architectural and algorithmic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "134", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tuncel:2023:SSC, author = "Yigit Tuncel and Toygun Basaklar and Dina Carpenter-Graffy and Umit Ogras", title = "A Self-Sustained {CPS} Design for Reliable Wildfire Monitoring", journal = j-TECS, volume = "22", number = "5s", pages = "135:1--135:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608100", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608100", abstract = "Continuous monitoring of areas nearby the electric grid is critical for preventing and early detection of devastating wildfires. Existing wildfire monitoring \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "135", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lohar:2023:SMF, author = "Debasmita Lohar and Clothilde Jeangoudoux and Anastasia Volkova and Eva Darulova", title = "Sound Mixed Fixed-Point Quantization of Neural Networks", journal = j-TECS, volume = "22", number = "5s", pages = "136:1--136:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609118", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609118", abstract = "Neural networks are increasingly being used as components in safety-critical applications, for instance, as controllers in embedded systems. Their formal \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "136", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bourke:2023:VCS, author = "Timothy Bourke and Basile Pesin and Marc Pouzet", title = "Verified Compilation of Synchronous Dataflow with State Machines", journal = j-TECS, volume = "22", number = "5s", pages = "137:1--137:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608102", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608102", abstract = "Safety-critical embedded software is routinely programmed in block-diagram languages. Recent work in the V{\'e}lus project specifies such a language and its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "137", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lee:2023:CVA, author = "Edward A. Lee and Ravi Akella and Soroush Bateni and Shaokai Lin and Marten Lohstroh and Christian Menard", title = "Consistency vs. Availability in Distributed Cyber-Physical Systems", journal = j-TECS, volume = "22", number = "5s", pages = "138:1--138:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609119", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609119", abstract = "In distributed applications, Brewer's CAP theorem tells us that when networks become partitioned (P), one must give up either consistency (C) or availability (A). \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "138", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Peeck:2023:IWC, author = "Jonas Peeck and Rolf Ernst", title = "Improving Worst-case {TSN} Communication Times of Large Sensor Data Samples by Exploiting Synchronization", journal = j-TECS, volume = "22", number = "5s", pages = "139:1--139:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609120", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609120", abstract = "Higher levels of automated driving also require a more sophisticated environmental perception. Therefore, an increasing number of sensors transmit their data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "139", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chou:2023:RSK, author = "Yi-Quan Chou and Lin-Wei Shen and Li-Pin Chang", title = "Rectifying Skewed Kernel Page Reclamation in Mobile Devices for Improving User-Perceivable Latency", journal = j-TECS, volume = "22", number = "5s", pages = "140:1--140:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607937", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607937", abstract = "A crucial design factor for users of smart mobile devices is the latency of graphical interface interaction. Switching a background app to foreground is a frequent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "140", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Majumdar:2023:NAB, author = "Rupak Majumdar and Mahmoud Salamati and Sadegh Soudjani", title = "Neural Abstraction-Based Controller Synthesis and Deployment", journal = j-TECS, volume = "22", number = "5s", pages = "141:1--141:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608104", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608104", abstract = "Abstraction-based techniques are an attractive approach for synthesizing correct-by-construction controllers to satisfy high-level temporal requirements. A \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "141", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Khan:2023:DDL, author = "Osama Khan and Gwanjong Park and Euiseong Seo", title = "{DaCapo}: an On-Device Learning Scheme for Memory-Constrained Embedded Systems", journal = j-TECS, volume = "22", number = "5s", pages = "142:1--142:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609121", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609121", abstract = "The use of deep neural network (DNN) applications in microcontroller unit (MCU) embedded systems is getting popular. However, the DNN models in such \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "142", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gunzel:2023:PRT, author = "Mario G{\"u}nzel and Niklas Ueter and Kuan-Hsun Chen and Georg von der Br{\"u}ggen and Jian-Jia Chen", title = "Probabilistic Reaction Time Analysis", journal = j-TECS, volume = "22", number = "5s", pages = "143:1--143:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609390", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609390", abstract = "In many embedded systems, for instance, in the automotive, avionic, or robotics domain, critical functionalities are implemented via chains of communicating recurrent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "143", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Vreman:2023:SAC, author = "Nils Vreman and Martina Maggio", title = "Stochastic Analysis of Control Systems Subject to Communication and Computation Faults", journal = j-TECS, volume = "22", number = "5s", pages = "144:1--144:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609123", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609123", abstract = "Control theory allows one to design controllers that are robust to external disturbances, model simplification, and modelling inaccuracy. Researchers \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "144", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zheng:2023:IIA, author = "Yongchun Zheng and Changlong Li and Yi Xiong and Weihong Liu and Cheng Ji and Zongwei Zhu and Lichen Yu", title = "{iAware}: Interaction Aware Task Scheduling for Reducing Resource Contention in Mobile Systems", journal = j-TECS, volume = "22", number = "5s", pages = "145:1--145:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609391", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609391", abstract = "To ensure the user experience of mobile systems, the foreground application can be differentiated to minimize the impact of background applications. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "145", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2023:FSN, author = "Hanrui Zhao and Niuniu Qi and Lydia Dehbi and Xia Zeng and Zhengfeng Yang", title = "Formal Synthesis of Neural Barrier Certificates for Continuous Systems via Counterexample Guided Learning", journal = j-TECS, volume = "22", number = "5s", pages = "146:1--146:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609125", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609125", abstract = "This paper presents a novel approach to safety verification based on neural barrier certificates synthesis for continuous dynamical systems. We construct \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "146", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Loveless:2023:CML, author = "Andrew Loveless and Linh Thi Xuan Phan and Lisa Erickson and Ronald Dreslinski and Baris Kasikci", title = "{CrossTalk}: Making Low-Latency Fault Tolerance Cheap by Exploiting Redundant Networks", journal = j-TECS, volume = "22", number = "5s", pages = "147:1--147:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609436", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609436", abstract = "Real-time embedded systems perform many important functions in the modern world. A standard way to tolerate faults in these systems is with Byzantine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "147", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shijubo:2023:PBB, author = "Junya Shijubo and Masaki Waga and Kohei Suenaga", title = "Probabilistic Black-Box Checking via Active {MDP} Learning", journal = j-TECS, volume = "22", number = "5s", pages = "148:1--148:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609127", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609127", abstract = "We introduce a novel methodology for testing stochastic black-box systems, frequently encountered in embedded systems. Our approach enhances \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "148", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Singh:2023:KWC, author = "Nikhilesh Singh and Karthikeyan Renganathan and Chester Rebeiro and Jithin Jose and Ralph Mader", title = "{Kryptonite}: Worst-Case Program Interference Estimation on Multi-Core Embedded Systems", journal = j-TECS, volume = "22", number = "5s", pages = "149:1--149:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609128", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609128", abstract = "Due to the low costs and energy needed, cyber-physical systems are adopting multi-core processors for their embedded computing requirements. In order \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "149", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Baruah:2023:OSR, author = "Sanjoy Baruah and Alan Burns and Robert Ian Davis", title = "Optimal Synthesis of Robust {IDK} Classifier Cascades", journal = j-TECS, volume = "22", number = "5s", pages = "150:1--150:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609129", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609129", abstract = "An IDK classifier is a computing component that categorizes inputs into one of a number of classes, if it is able to do so with the required level of confidence, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "150", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brun:2023:EDA, author = "L{\'e}lio Brun and Christophe Garion and Pierre-Lo{\"\i}c Garoche and Xavier Thirioux", title = "Equation-Directed Axiomatization of {Lustre} Semantics to Enable Optimized Code Validation", journal = j-TECS, volume = "22", number = "5s", pages = "151:1--151:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609393", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609393", abstract = "Model-based design tools like SCADE Suite and Simulink are often used to design safety-critical embedded software. Consequently, generating correct \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "151", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Colaco:2023:CSB, author = "Jean-Louis Cola{\c{c}}o and Michael Mendler and Baptiste Pauget and Marc Pouzet", title = "A Constructive State-based Semantics and Interpreter for a Synchronous Data-flow Language with State Machines", journal = j-TECS, volume = "22", number = "5s", pages = "152:1--152:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609131", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609131", abstract = "Scade is a domain-specific synchronous functional language used to implement safety-critical real-time software for more than twenty years. Two main \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "152", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thilakasiri:2023:MRP, author = "Thilanka Thilakasiri and Matthias Becker", title = "Methods to Realize Preemption in Phased Execution Models", journal = j-TECS, volume = "22", number = "5s", pages = "153:1--153:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609132", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609132", abstract = "Phased execution models are a good solution to tame the increased complexity and contention of commercial off-the-shelf (COTS) multi-core platforms, e.g., \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "153", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Szeto:2023:BAB, author = "Matthew Szeto and Edward Andert and Aviral Shrivastava and Martin Reisslein and Chung-Wei Lin and Christ Richmond", title = "{B-AWARE}: Blockage Aware {RSU} Scheduling for {5G} Enabled Autonomous Vehicles", journal = j-TECS, volume = "22", number = "5s", pages = "154:1--154:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609133", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609133", abstract = "5G Millimeter Wave (mmWave) technology holds great promise for Connected Autonomous Vehicles (CAVs) due to its ability to achieve data rates in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "154", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2023:TBV, author = "Shaokai Lin and Yatin A. Manerkar and Marten Lohstroh and Elizabeth Polgreen and Sheng-Jung Yu and Chadlia Jerad and Edward A. Lee and Sanjit A. Seshia", title = "Towards Building Verifiable {CPS} using {Lingua Franca}", journal = j-TECS, volume = "22", number = "5s", pages = "155:1--155:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609134", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609134", abstract = "Formal verification of cyber-physical systems (CPS) is challenging because it has to consider real-time and concurrency aspects that are often absent in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "155", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bartocci:2023:MHU, author = "Ezio Bartocci and Cristinel Mateis and Eleonora Nesterini and Dejan Ni{\v{c}}kovi{\'c}", title = "Mining Hyperproperties using Temporal Logics", journal = j-TECS, volume = "22", number = "5s", pages = "156:1--156:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609394", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Sep 18 08:59:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609394", abstract = "Formal specifications are essential to express precisely systems, but they are often difficult to define or unavailable. Specification mining aims to automatically infer \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "156", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Metz:2023:BBS, author = "David Metz and Vineet Kumar and Magnus Sj{\"a}lander", title = "{BISDU}: a Bit-Serial Dot-Product Unit for Microcontrollers", journal = j-TECS, volume = "22", number = "5", pages = "79:1--79:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608447", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608447", abstract = "Low-precision quantized neural networks (QNNs) reduce the required memory space, bandwidth, and computational power, and hence are suitable for deployment in applications such as IoT edge devices. Mixed-precision QNNs, where weights commonly have lower precision than activations or different precision is used for different layers, can limit the accuracy loss caused by low-bit quantization, while still benefiting from reduced memory footprint and faster execution. Previous multiple-precision functional units supporting 8-bit, 4-bit, and 2-bit SIMD instructions have limitations, such as large area overhead, under-utilization of multipliers, and wasted memory space for low and mixed bit-width operations.\par This article introduces BISDU, a bit-serial dot-product unit to support and accelerate execution of mixed-precision low-bit QNNs on resource-constrained microcontrollers. BISDU is a multiplier-less dot-product unit, with frugal hardware requirements (a population count unit and 2:1 multiplexers). The proposed bit-serial dot-product unit leverages the conventional logical operations of a microcontroller to perform multiplications, which enables efficient software implementations of binary (Xnor), ternary (Xor), and mixed-precision [W $ \times $ A] (And) dot-product operations.\par The experimental results show that BISDU achieves competitive performance compared to two state-of-the-art units, XpulpNN and Dustin, when executing low-bit-width CNNs. We demonstrate the advantage that bit-serial execution provides by enabling trading accuracy against weight footprint and execution time. BISDU increases the area of the ALU by 68\% and the ALU power consumption by 42\% compared to a baseline 32-bit RISC-V (RV32IC) microcontroller core. In comparison, XpulpNN and Dustin increase the area by 6.9$ \times $ and 11.1 $ \times $ and the power consumption by 3.8$ \times $ and 5.97$ \times $, respectively. The bit-serial state-of-the-art, based on a conventional popcount instruction, increases the area by 42\% and power by 32\%, with BISDU providing a 37\% speedup over it.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kwon:2023:LRT, author = "Hyeokdong Kwon and Hyunjun Kim and Minjoo Sim and Wai-Kong Lee and Hwajeong Seo", title = "Look-up the Rainbow: Table-based Implementation of Rainbow Signature on 64-bit {ARMv8} Processors", journal = j-TECS, volume = "22", number = "5", pages = "80:1--80:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607140", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607140", abstract = "The Rainbow Signature Scheme is one of the finalists in the National Institute of Standards and Technology (NIST) Post-Quantum Cryptography (PQC) standardization competition, but failed to win because it has lack of stability in the parameter selection \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Schneider:2023:CCM, author = "Klaus Schneider and Anoop Bhagyanath", title = "Consistency Constraints for Mapping Dataflow Graphs to Hybrid Dataflow\slash {von Neumann} Architectures", journal = j-TECS, volume = "22", number = "5", pages = "81:1--81:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3607869", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607869", abstract = "Dataflow process networks (DPNs) provide a convenient model of computation that is often used to model system behavior in model-based designs. With fixed sets of nodes, they are also used as dataflow graphs as an intermediate program representation by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Caronti:2023:FGH, author = "Luca Caronti and Khakim Akhunov and Matteo Nardello and Kasim Sinan Yildirim and Davide Brunelli", title = "Fine-grained Hardware Acceleration for Efficient Batteryless Intermittent Inference on the Edge", journal = j-TECS, volume = "22", number = "5", pages = "82:1--82:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608475", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3608475", abstract = "Backing up the intermediate results of hardware-accelerated deep inference is crucial to ensure the progress of execution on batteryless computing platforms. However, hardware accelerators in low-power AI platforms only support the one-shot atomic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lei:2023:FII, author = "Douwei Lei and Debiao He and Cong Peng and Min Luo and Zhe Liu and Xinyi Huang", title = "Faster Implementation of Ideal Lattice-Based Cryptography Using {AVX512}", journal = j-TECS, volume = "22", number = "5", pages = "83:1--83:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609223", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609223", abstract = "With the development of quantum computing, the existing cryptography schemes based on classical cryptographic primitives will no longer be secure. Hence, cryptographers are designing post-quantum cryptographic (PQC) schemes, and ideal lattice-based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2023:RCR, author = "Wei-Ju Chen and Peng Wu and Pei-Chi Huang and Aloysius K. Mok and Song Han", title = "Regular Composite Resource Partitioning and Reconfiguration in Open Systems", journal = j-TECS, volume = "22", number = "5", pages = "84:1--84:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609424", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609424", abstract = "We consider the problem of resource provisioning for real-time cyber-physical applications in an open system environment where there does not exist a global resource scheduler that has complete knowledge of the real-time performance requirements of each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Inagaki:2023:PSC, author = "Saya Inagaki and Mingyu Yang and Yang Li and Kazuo Sakiyama and Yuko Hara-Azumi", title = "Power Side-channel Attack Resistant Circuit Designs of {ARX} Ciphers Using High-level Synthesis", journal = j-TECS, volume = "22", number = "5", pages = "85:1--85:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609507", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609507", abstract = "In the Internet of Things (IoT) era, edge devices have been considerably diversified and are often designed using high-level synthesis (HLS) for improved design productivity. However, HLS tools were originally developed in a security-unaware manner, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Luo:2023:SEF, author = "Yuling Luo and Shiqi Zhang and Shunsheng Zhang and Junxiu Liu and Yanhu Wang and Su Yang", title = "A Secure and Efficient Framework for Outsourcing Large-scale Matrix Determinant and Linear Equations", journal = j-TECS, volume = "22", number = "5", pages = "86:1--86:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3611014", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3611014", abstract = "Large-scale matrix determinants and linear equations are two basic computational tools in science and engineering fields. However, it is difficult for a resource-constrained client to solve large-scale computational tasks. Cloud computing service provides \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Calsi:2023:IRA, author = "Davide Li Calsi and Vittorio Zaccaria", title = "Interruptible Remote Attestation of Low-end {IoT} Microcontrollers via Performance Counters", journal = j-TECS, volume = "22", number = "5", pages = "87:1--87:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3611674", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3611674", abstract = "Remote attestation is a method used in distributed systems to detect integrity violations on a target device (prover) through a challenge-response protocol initiated by a verifier device. The prover calculates a hash of its memory, which is compared to a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Indrusiak:2023:RTG, author = "Leandro Soares Indrusiak and Alan Burns", title = "Real-Time Guarantees in Routerless Networks-on-Chip", journal = j-TECS, volume = "22", number = "5", pages = "88:1--88:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3616539", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 2 15:31:58 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3616539", abstract = "This article considers the use of routerless networks-on-chip as an alternative on-chip interconnect for multi-processor systems requiring hard real-time guarantees for inter-processor communication. It presents a novel analytical framework that can \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2023:SIA, author = "Yun (Eric) Liang and Wei Zhang and Stephen Neuendorffer and Wayne Luk", title = "Special Issue: {``AI Acceleration on FPGAs''}", journal = j-TECS, volume = "22", number = "6", pages = "89:1--89:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626323", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626323", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2023:HPR, author = "Xianghong Hu and Hongmin Huang and Xueming Li and Xin Zheng and Qinyuan Ren and Jingyu He and Xiaoming Xiong", title = "High-performance Reconfigurable {DNN} Accelerator on a Bandwidth-limited Embedded System", journal = j-TECS, volume = "22", number = "6", pages = "90:1--90:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3530818", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3530818", abstract = "Deep convolutional neural networks (DNNs) have been widely used in many applications, particularly in machine vision. It is challenging to accelerate DNNs on embedded systems because real-world machine vision applications should reserve a lot of external \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2023:FCF, author = "Xiaoyang Wang and Zhe Zhou and Zhihang Yuan and Jingchen Zhu and Yulong Cao and Yao Zhang and Kangrui Sun and Guangyu Sun", title = "{FD-CNN}: a Frequency-Domain {FPGA} Acceleration Scheme for {CNN}-Based Image-Processing Applications", journal = j-TECS, volume = "22", number = "6", pages = "91:1--91:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3559105", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3559105", abstract = "In the emerging edge-computing scenarios, FPGAs have been widely adopted to accelerate convolutional neural network (CNN)-based image-processing applications, such as image classification, object detection, and image segmentation, and so on. A standard \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2023:ICD, author = "Zhengzheng Ma and Tuo Dai and Xuechao Wei and Guojie Luo", title = "An Intermediate-Centric Dataflow for Transposed Convolution Acceleration on {FPGA}", journal = j-TECS, volume = "22", number = "6", pages = "92:1--92:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561053", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3561053", abstract = "Transposed convolution has been prevailing in convolutional neural networks (CNNs), playing an important role in multiple scenarios such as image segmentation and back-propagation process of training CNNs. This mainly benefits from the ability to up-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ye:2023:AAM, author = "Wenhua Ye and Xu Zhou and Joey Zhou and Cen Chen and Kenli Li", title = "Accelerating Attention Mechanism on {FPGAs} based on Efficient Reconfigurable Systolic Array", journal = j-TECS, volume = "22", number = "6", pages = "93:1--93:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3549937", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3549937", abstract = "Transformer model architectures have recently received great interest in natural language, machine translation, and computer vision, where attention mechanisms are their building blocks. However, the attention mechanism is expensive because of its \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alam:2023:RIF, author = "Syed Asad Alam and David Gregg and Giulio Gambardella and Thomas Preusser and Michaela Blott", title = "On the {RTL} Implementation of {FINN} Matrix Vector Unit", journal = j-TECS, volume = "22", number = "6", pages = "94:1--94:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3547141", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3547141", abstract = "Field-programmable gate array (FPGA)-based accelerators are becoming increasingly popular for deep neural network (DNN) inference due to their ability to scale performance with increasing degrees of specialization with dataflow architectures or custom \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Feng:2023:ADS, author = "Kaijie Feng and Xiaoya Fan and Jianfeng An and Chuxi Li and Kaiyue Di and Jiangfei Li", title = "{ACDSE}: a Design Space Exploration Method for {CNN} Accelerator based on Adaptive Compression Mechanism", journal = j-TECS, volume = "22", number = "6", pages = "95:1--95:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3545177", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3545177", abstract = "Customized accelerators for Convolutional Neural Network (CNN) can achieve better energy efficiency than general computing platforms. However, the design of a high-performance accelerator should take into account a variety of parameters and physical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shu:2023:TID, author = "Jiwu Shu and Kedong Fang and Youmin Chen and Shuo Wang", title = "{TH-iSSD}: Design and Implementation of a Generic and Reconfigurable Near-Data Processing Framework", journal = j-TECS, volume = "22", number = "6", pages = "96:1--96:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563456", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3563456", abstract = "We present the design and implementation of TH-iSSD, a near-data processing framework to address the data movement problem. TH-iSSD does not pose any restriction to the hardware selection and is highly reconfigurable-its core components, such as the on-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fu:2023:RRB, author = "Yu Fu and Jingqiang Lin and Dengguo Feng and Wei Wang and Mingyu Wang and Wenjie Wang", title = "{RegKey}: a Register-based Implementation of {ECC} Signature Algorithms Against One-shot Memory Disclosure", journal = j-TECS, volume = "22", number = "6", pages = "97:1--97:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604805", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3604805", abstract = "To ensure the security of cryptographic algorithm implementations, several cryptographic key protection schemes have been proposed to prevent various memory disclosure attacks. Among them, the register-based solutions do not rely on special hardware \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Min:2023:SBM, author = "Chulhong Min and Akhil Mathur and Utku G{\"u}nay Acer and Alessandro Montanari and Fahim Kawsar", title = "{SensiX++}: Bringing {MLOps} and Multi-tenant Model Serving to Sensory Edge Devices", journal = j-TECS, volume = "22", number = "6", pages = "98:1--98:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617507", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3617507", abstract = "We present SensiX++, a multi-tenant runtime for adaptive model execution with integrated MLOps on edge devices, e.g., a camera, a microphone, or IoT sensors. SensiX++ operates on two fundamental principles: highly modular componentisation to externalise \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{ElYaacoub:2023:SDS, author = "Ahmed {El Yaacoub} and Luca Mottola and Thiemo Voigt and Philipp R{\"u}mmer", title = "Scheduling Dynamic Software Updates in Mobile Robots", journal = j-TECS, volume = "22", number = "6", pages = "99:1--99:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3623676", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3623676", abstract = "We present NeRTA (Next Release Time Analysis), a technique to enable dynamic software updates for low-level control software of mobile robots. Dynamic software updates enable software correction and evolution during system operation. In mobile robotics, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Samaddar:2023:ODS, author = "Ankita Samaddar and Arvind Easwaran", title = "Online Distributed Schedule Randomization to Mitigate Timing Attacks in Industrial Control Systems", journal = j-TECS, volume = "22", number = "6", pages = "100:1--100:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3624584", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3624584", abstract = "Industrial control systems (ICSs) consist of a large number of control applications that are associated with periodic real-time flows with hard deadlines. To facilitate large-scale integration, remote control, and co-ordination, wireless sensor and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2023:SFA, author = "Jun-Shen Wu and Tsen-Wei Hsu and Ren-Shuo Liu", title = "{SG-Float}: Achieving Memory Access and Computing Power Reduction Using Self-Gating Float in {CNNs}", journal = j-TECS, volume = "22", number = "6", pages = "101:1--101:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3624582", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3624582", abstract = "Convolutional neural networks (CNNs) are essential for advancing the field of artificial intelligence. However, since these networks are highly demanding in terms of memory and computation, implementing CNNs can be challenging. To make CNNs more \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hung:2023:EEC, author = "Chen-Tui Hung and Kai Xuan Lee and Yi-Zheng Liu and Ya-Shu Chen and Zhong-Han Chan", title = "Energy-Efficient Communications for Improving Timely Progress of Intermittent-Powered {BLE} Devices", journal = j-TECS, volume = "22", number = "6", pages = "102:1--102:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626197", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626197", abstract = "Battery-less devices offer potential solutions for maintaining sustainable Internet of Things (IoT) networks. However, limited energy harvesting capacity can lead to power failures, limiting the system's quality of service (QoS). To improve timely task \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Javadi:2023:CME, author = "Mohammad Haji Seyed Javadi and Mohsen Faryabi and Hamid Reza Mahdiani", title = "A Comprehensive Model for Efficient Design Space Exploration of Imprecise Computational Blocks", journal = j-TECS, volume = "22", number = "6", pages = "103:1--103:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3625555", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3625555", abstract = "After almost a decade of research, development of more efficient imprecise computational blocks is still a major concern in imprecise computing domain. There are many instances of the introduced imprecise components of different types, while their main \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Siddhu:2023:DTM, author = "Lokesh Siddhu and Aritra Bagchi and Rajesh Kedia and Isaar Ahmad and Shailja Pandey and Preeti Ranjan Panda", title = "Dynamic Thermal Management of {$3$D} Memory through Rotating Low Power States and Partial Channel Closure", journal = j-TECS, volume = "22", number = "6", pages = "104:1--104:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3624581", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3624581", abstract = "Modern high-performance and high-bandwidth three-dimensional (3D) memories are characterized by frequent heating. Prior art suggests turning off hot channels and migrating data to the background DDR memory, incurring significant performance and energy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2023:EBN, author = "Erwei Wang and James J. Davis and Daniele Moro and Piotr Zielinski and Jia Jie Lim and Claudionor Coelho and Satrajit Chatterjee and Peter Y. K. Cheung and George A. Constantinides", title = "Enabling Binary Neural Network Training on the Edge", journal = j-TECS, volume = "22", number = "6", pages = "105:1--105:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626100", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626100", abstract = "The ever-growing computational demands of increasingly complex machine learning models frequently necessitate the use of powerful cloud-based infrastructure for their training. Binary neural networks are known to be promising candidates for on-device \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Farahmand:2023:DAH, author = "Ebrahim Farahmand and Ali Mahani and Muhammad Abdullah Hanif and Muhammad Shafique", title = "Design and Analysis of High Performance Heterogeneous Block-based Approximate Adders", journal = j-TECS, volume = "22", number = "6", pages = "106:1--106:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3625686", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:46 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3625686", abstract = "Approximate computing is an emerging paradigm to improve the power and performance efficiency of error-resilient applications. As adders are one of the key components in almost all processing systems, a significant amount of research has been carried out \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Casini:2024:ISI, author = "Daniel Casini and Dakshina Dasari and Matthias Becker and Giorgio Buttazzo", title = "Introduction to the Special Issue on Real-Time Computing in the {IoT}-to-Edge-to-Cloud Continuum", journal = j-TECS, volume = "23", number = "1", pages = "1:1--1:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605180", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605180", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2024:DTO, author = "Ying Chen and Jie Zhao and Jintao Hu and Shaohua Wan and Jiwei Huang", title = "Distributed Task Offloading and Resource Purchasing in {NOMA-Enabled} Mobile Edge Computing: Hierarchical Game Theoretical Approaches", journal = j-TECS, volume = "23", number = "1", pages = "2:1--2:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597023", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3597023", abstract = "As the computing resources and the battery capacity of mobile devices are usually limited, it is a feasible solution to offload the computation-intensive tasks generated by mobile devices to edge servers (ESs) in mobile edge computing (MEC). In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cucinotta:2024:MCO, author = "Tommaso Cucinotta and Alexandre Amory and Gabriele Ara and Francesco Paladino and Marco {Di Natale}", title = "Multi-criteria Optimization of Real-time {DAGs} on Heterogeneous Platforms under {P-EDF}", journal = j-TECS, volume = "23", number = "1", pages = "3:1--3:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3592609", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3592609", abstract = "This article tackles the problem of optimal placement of complex real-time embedded applications on heterogeneous platforms. Applications are composed of directed acyclic graphs of tasks, with each directed-acyclic-graph (DAG) having a minimum inter-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Struhar:2024:HRO, author = "V{\'a}clav Struh{\'a}r and Silviu S. Craciunas and Mohammad Ashjaei and Moris Behnam and Alessandro V. Papadopoulos", title = "Hierarchical Resource Orchestration Framework for Real-time Containers", journal = j-TECS, volume = "23", number = "1", pages = "4:1--4:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3592856", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/doi/10.1145/3592856", abstract = "Container-based virtualization is a promising deployment model in fog and edge computing applications, because it allows a seamless co-existence of virtualized applications in a heterogeneous environment without introducing significant overhead. Certain \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Barletta:2024:CAM, author = "Marco Barletta and Marcello Cinque and Luigi {De Simone} and Raffaele {Della Corte}", title = "Criticality-aware Monitoring and Orchestration for Containerized Industry 4.0 Environments", journal = j-TECS, volume = "23", number = "1", pages = "5:1--5:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604567", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3604567", abstract = "The evolution of industrial environments makes the reconfigurability and flexibility key requirements to rapidly adapt to changeable market needs. Computing paradigms like Edge/Fog computing are able to provide the required flexibility and scalability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ghosh:2024:PPA, author = "Soumendu Kumar Ghosh and Arnab Raha and Vijay Raghunathan and Anand Raghunathan", title = "{PArtNNer}: Platform-Agnostic Adaptive Edge-Cloud {DNN} Partitioning for Minimizing End-to-End Latency", journal = j-TECS, volume = "23", number = "1", pages = "6:1--6:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3630266", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3630266", abstract = "The last decade has seen the emergence of Deep Neural Networks (DNNs) as the de facto algorithm for various computer vision applications. In intelligent edge devices, sensor data streams acquired by the device are processed by a DNN application running on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2024:SLB, author = "Haitao Xu and Saiyu Qi and Yong Qi and Wei Wei and Naixue Xiong", title = "Secure and Lightweight Blockchain-based Truthful Data Trading for Real-Time Vehicular Crowdsensing", journal = j-TECS, volume = "23", number = "1", pages = "7:1--7:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3582008", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3582008", abstract = "As the number of smart cars grows rapidly, vehicular crowdsensing (VCS) is gradually becoming popular. In a VCS infrastructure, sensing devices and computing units hold on smart cars as well as cloud servers form an IoT-edge-cloud continuum to perform \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Oza:2024:DAT, author = "Pratham Oza and Nathaniel Hudson and Thidapat Chantem and Hana Khamfroush", title = "Deadline-Aware Task Offloading for Vehicular Edge Computing Networks Using Traffic Light Data", journal = j-TECS, volume = "23", number = "1", pages = "8:1--8:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3594541", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3594541", abstract = "As vehicles have become increasingly automated, novel vehicular applications have emerged to enhance the safety and security of the vehicles and improve user experience. This brings ever-increasing data and resource requirements for timely computation by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gaitan:2024:MOC, author = "Miguel Guti{\'e}rrez Gait{\'a}n and Lu{\'\i}s Almeida and Pedro M. D'orey and Pedro M. Santos and Thomas Watteyne", title = "Minimal-Overlap Centrality for Multi-Gateway Designation in Real-Time {TSCH} Networks", journal = j-TECS, volume = "23", number = "1", pages = "9:1--9:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3610583", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3610583", abstract = "This article presents a novel centrality-driven gateway designation framework for the improved real-time performance of low-power wireless sensor networks (WSNs) at system design time. We target time-synchronized channel hopping (TSCH) WSNs with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Iyer:2024:HCM, author = "Vishnuvardhan V. Iyer and Aditya Thimmaiah and Michael Orshansky and Andreas Gerstlauer and Ali E. Yilmaz", title = "A Hierarchical Classification Method for High-accuracy Instruction Disassembly with Near-field {EM} Measurements", journal = j-TECS, volume = "23", number = "1", pages = "10:1--10:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3629167", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3629167", abstract = "Electromagnetic (EM) fields have been extensively studied as potent side-channel tools for testing the security of hardware implementations. In this work, a low-cost side-channel disassembler that uses fine-grained EM signals to predict a program's \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2024:EAA, author = "Yi-Wen Zhang and Hui Zheng and Zonghua Gu", title = "Energy-Aware Adaptive Mixed-Criticality Scheduling with Semi-Clairvoyance and Graceful Degradation", journal = j-TECS, volume = "23", number = "1", pages = "11:1--11:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632749", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3632749", abstract = "The classic Mixed-Criticality System (MCS) task model is a non-clairvoyance model in which the change of the system behavior is based on the completion of high-criticality tasks while dropping low-criticality tasks in high-criticality mode. In this paper, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bagchi:2024:CCA, author = "Aritra Bagchi and Dinesh Joshi and Preeti Ranjan Panda", title = "{COBRRA}: {COntention-aware} cache Bypass with Request-Response Arbitration", journal = j-TECS, volume = "23", number = "1", pages = "12:1--12:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632748", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3632748", abstract = "In modern multi-processor systems-on-chip (MPSoCs), requests from different processor cores, accelerators, and their responses from the lower-level memory contend for the shared cache bandwidth, making it a critical performance bottleneck. Prior research \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shin:2024:VEM, author = "Yong-Jun Shin and Donghwan Shin and Doo-Hwan Bae", title = "Virtual Environment Model Generation for {CPS} Goal Verification using Imitation Learning", journal = j-TECS, volume = "23", number = "1", pages = "13:1--13:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633804", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3633804", abstract = "Cyber-Physical Systems (CPS) continuously interact with their physical environments through embedded software controllers that observe the environments and determine actions. Field Operational Tests (FOT) are essential to verify to what extent the CPS \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yu:2024:MAE, author = "Wangyang Yu and Jinming Kong and Zhijun Ding and Xiaojun Zhai and Zhiqiang Li and Qi Guo", title = "Modeling and Analysis of {ETC} Control System with Colored {Petri} Net and Dynamic Slicing", journal = j-TECS, volume = "23", number = "1", pages = "14:1--14:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3633450", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3633450", abstract = "Nowadays, Electronic Toll Collection (ETC) control systems have been widely adopted to smoothen traffic flow on highways. However, as it is a complex business interaction system, there are inevitably flaws in its control logic process, such as the problem \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2024:REA, author = "Zhijian He and Bohuan Xue and Xiangcheng Hu and Zhaoyan Shen and Xiangyue Zeng and Ming Liu", title = "Robust Embedded Autonomous Driving Positioning System Fusing {LiDAR} and Inertial Sensors", journal = j-TECS, volume = "23", number = "1", pages = "15:1--15:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626098", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626098", abstract = "Autonomous driving emphasizes precise multi-sensor fusion positioning on limit resource embedded systems. LiDAR-centered sensor fusion system serves as a mainstream navigation system due to its insensitivity to illumination and viewpoint change. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Qi:2024:MCS, author = "Huamei Qi and Fang Ren and Leilei Wang and Ping Jiang and Shaohua Wan and Xiaoheng Deng", title = "Multi-Compression Scale {DNN} Inference Acceleration based on Cloud-Edge-End Collaboration", journal = j-TECS, volume = "23", number = "1", pages = "16:1--16:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3634704", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Sat Feb 3 11:10:48 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3634704", abstract = "Edge intelligence has emerged as a promising paradigm to accelerate DNN inference by model partitioning, which is particularly useful for intelligent scenarios that demand high accuracy and low latency. However, the dynamic nature of the edge environment \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Que:2024:LGL, author = "Zhiqiang Que and Hongxiang Fan and Marcus Loo and He Li and Michaela Blott and Maurizio Pierini and Alexander Tapper and Wayne Luk", title = "{LL-GNN}: Low Latency Graph Neural Networks on {FPGAs} for High Energy Physics", journal = j-TECS, volume = "23", number = "2", pages = "17:1--17:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3640464", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3640464", abstract = "This work presents a novel reconfigurable architecture for Low Latency Graph Neural Network (LL-GNN) designs for particle detectors, delivering unprecedented low latency performance. Incorporating FPGA-based GNNs into particle detectors presents a unique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alsubhi:2024:SFE, author = "Arwa Alsubhi and Simeon Babatunde and Nicole Tobias and Jacob Sorber", title = "{Stash}: Flexible Energy Storage for Intermittent Sensors", journal = j-TECS, volume = "23", number = "2", pages = "18:1--18:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641511", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3641511", abstract = "Batteryless sensors promise a sustainable future for sensing, but they face significant challenges when storing and using environmental energy. Incoming energy can fluctuate unpredictably between periods of scarcity and abundance, and device performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2024:ICV, author = "Liang Zhao and Hongxuan Li and Enchao Zhang and Ammar Hawbani and Mingwei Lin and Shaohua Wan and Mohsen Guizani", title = "Intelligent Caching for Vehicular Dew Computing in Poor Network Connectivity Environments", journal = j-TECS, volume = "23", number = "2", pages = "19:1--19:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643038", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3643038", abstract = "In vehicular networks, some edge servers may not function properly due to the time-varying load condition and the uneven computing resource distribution, resulting in a low quality of caching services. To overcome this challenge, we develop a Vehicular \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sah:2024:ATE, author = "Ramesh Kumar Sah and Hassan Ghasemzadeh", title = "Adversarial Transferability in Embedded Sensor Systems: an Activity Recognition Perspective", journal = j-TECS, volume = "23", number = "2", pages = "20:1--20:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641861", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3641861", abstract = "Machine learning algorithms are increasingly used for inference and decision-making in embedded systems. Data from sensors are used to train machine learning models for various smart functions of embedded and cyber-physical systems ranging from \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kornaros:2024:FUI, author = "George Kornaros and Svoronos Leivadaros and Filippos Kolimbianakis", title = "Flexible Updating of {Internet} of Things Computing Functions through Optimizing Dynamic Partial Reconfiguration", journal = j-TECS, volume = "23", number = "2", pages = "21:1--21:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643825", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3643825", abstract = "With applications to become increasingly compute- and data-intensive, requiring more processing power, many Internet of Things (IoT) platforms in robots, drones, and autonomous vehicles that implement neural network inference, cryptographic functions or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fatnassi:2024:PNN, author = "Wael Fatnassi and Yasser Shoukry", title = "{PolyARBerNN}: a Neural Network Guided Solver and Optimizer for Bounded Polynomial Inequalities", journal = j-TECS, volume = "23", number = "2", pages = "22:1--22:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632970", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3632970", abstract = "Constraints solvers play a significant role in the analysis, synthesis, and formal verification of complex cyber-physical systems. In this article, we study the problem of designing a scalable constraints solver for an important class of constraints named \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2024:CIS, author = "Lu Li and Qi Tian and Guofeng Qin and Shuaiyu Chen and Weijia Wang", title = "Compact Instruction Set Extensions for {Dilithium}", journal = j-TECS, volume = "23", number = "2", pages = "23:1--23:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643826", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3643826", abstract = "Post-quantum cryptography is considered to provide security against both traditional and quantum computer attacks. Dilithium is a digital signature algorithm that derives its security from the challenge of finding short vectors in lattices. It has been \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2024:SGC, author = "Chin-Hsien Wu and Cheng-Tze Lee and Yi-Ren Tsai and Cheng-Yen Wu", title = "A Space-Grained Cleaning Method to Reduce Long-Tail Latency of {DM-SMR} Disks", journal = j-TECS, volume = "23", number = "2", pages = "24:1--24:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643827", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3643827", abstract = "DM-SMR (device-managed shingled magnetic recording) disks allocate a portion of disk space as the persistent cache (PC) to address the issue of overlapping tracks during data updates. When the PC space becomes insufficient, a space cleaning is triggered \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deng:2024:SST, author = "Jianing Deng and Shunjie Dong and Lvcheng Chen and Jingtong Hu and Cheng Zhuo", title = "{STDF}: Spatio-Temporal Deformable Fusion for Video Quality Enhancement on Embedded Platforms", journal = j-TECS, volume = "23", number = "2", pages = "25:1--25:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645113", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3645113", abstract = "With the development of embedded systems and deep learning, it is feasible to combine them for offering various and convenient human-centered services, which is based on high-quality (HQ) videos. However, due to the limit of video traffic load and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhasin:2024:SIP, author = "Shivam Bhasin and Fabrizio {De Santis} and Francesco Regazzoni", title = "Special Issue on Post-Quantum Cryptography for Embedded Systems", journal = j-TECS, volume = "23", number = "2", pages = "26:1--26:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641852", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3641852", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mujdei:2024:SCA, author = "Catinca Mujdei and Lennert Wouters and Angshuman Karmakar and Arthur Beckers and Jose Maria Bermudo Mera and Ingrid Verbauwhede", title = "Side-channel Analysis of Lattice-based Post-quantum Cryptography: Exploiting Polynomial Multiplication", journal = j-TECS, volume = "23", number = "2", pages = "27:1--27:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3569420", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3569420", abstract = "Polynomial multiplication algorithms such as Toom--Cook and the Number Theoretic Transform are fundamental building blocks for lattice-based post-quantum cryptography. In this work we present correlation power-analysis-based side-channel analysis \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2024:MEE, author = "Saransh Gupta and Rosario Cammarota and Tajana Simuni{\'c}", title = "{MemFHE}: End-to-end Computing with Fully Homomorphic Encryption in Memory", journal = j-TECS, volume = "23", number = "2", pages = "28:1--28:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3569955", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3569955", abstract = "The increasing amount of data and the growing complexity of problems have resulted in an ever-growing reliance on cloud computing. However, many applications, most notably in healthcare, finance, or defense, demand security and privacy, which today's \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thoma:2024:AAS, author = "Jan Philipp Thoma and Darius Hartlief and Tim G{\"u}neysu", title = "Agile Acceleration of Stateful Hash-based Signatures in Hardware", journal = j-TECS, volume = "23", number = "2", pages = "29:1--29:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3567426", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3567426", abstract = "With the development of large-scale quantum computers, the current landscape of asymmetric cryptographic algorithms will change dramatically. Today's standards like RSA, DSA, and ElGamal will no longer provide sufficient security against quantum attackers \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Karl:2024:PQS, author = "Patrick Karl and Jonas Schupp and Tim Fritzmann and Georg Sigl", title = "Post-Quantum Signatures on {RISC-V} with Hardware Acceleration", journal = j-TECS, volume = "23", number = "2", pages = "30:1--30:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3579092", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3579092", abstract = "CRYSTALS-Dilithium and Falcon are digital signature algorithms based on cryptographic lattices, which are considered secure even if large-scale quantum computers will be able to break conventional public-key cryptography. Both schemes have been selected \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Elkhatib:2024:CEF, author = "Rami Elkhatib and Brian Koziel and Reza Azarderakhsh and Mehran Mozaffari Kermani", title = "Cryptographic Engineering a Fast and Efficient {SIKE} in {FPGA}", journal = j-TECS, volume = "23", number = "2", pages = "31:1--31:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3584919", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3584919", abstract = "Recent attacks have shown that SIKE is not secure and should not be used in its current state. However, this work was completed before these attacks were discovered and might be beneficial to other cryptosystems such as SQISign. The primary downside of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Singh:2024:AEF, author = "Richa Singh and Saad Islam and Berk Sunar and Patrick Schaumont", title = "Analysis of {EM} Fault Injection on Bit-sliced Number Theoretic Transform Software in {Dilithium}", journal = j-TECS, volume = "23", number = "2", pages = "32:1--32:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3583757", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3583757", abstract = "Bitslicing is a software implementation technique that treats an N -bit processor datapath as N parallel single-bit datapaths. Bitslicing is particularly useful to implement data-parallel algorithms, algorithms that apply the same operation sequence to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jati:2024:CCK, author = "Arpan Jati and Naina Gupta and Anupam Chattopadhyay and Somitra Kumar Sanadhya", title = "A Configurable {CRYSTALS--Kyber} Hardware Implementation with Side-Channel Protection", journal = j-TECS, volume = "23", number = "2", pages = "33:1--33:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3587037", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3587037", abstract = "In this work, we present a configurable and side channel resistant implementation of the post-quantum key-exchange algorithm CRYSTALS-Kyber. The implemented design can be configured for different performance and area requirements leading to different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alnahawi:2024:TNG, author = "Nouri Alnahawi and Nicolai Schmitt and Alexander Wiesmaier and Chiara-Marie Zok", title = "Toward Next Generation Quantum-Safe {eIDs} and {eMRTDs}: a Survey", journal = j-TECS, volume = "23", number = "2", pages = "34:1--34:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3585517", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3585517", abstract = "Security mechanisms of Electronic Personal Documents (eCards) depend on (asymmetric) cryptography that is and always has been subject to the threat of compromise, be it from conventional attacks or quantum computers. With Post-Quantum Cryptography (PQC), \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ravi:2024:SCF, author = "Prasanna Ravi and Anupam Chattopadhyay and Jan Pieter D'Anvers and Anubhab Baksi", title = "Side-channel and Fault-injection attacks over Lattice-based Post-quantum Schemes ({Kyber}, {Dilithium}): Survey and New Results", journal = j-TECS, volume = "23", number = "2", pages = "35:1--35:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3603170", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Apr 10 08:49:11 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3603170", abstract = "In this work, we present a systematic study of Side-Channel Attacks (SCA) and Fault Injection Attacks (FIA) on structured lattice-based schemes, with main focus on Kyber Key Encapsulation Mechanism (KEM) and Dilithium signature scheme, which are leading \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Niu:2024:EMF, author = "Linwei Niu and Danda B. Rawat and Dakai Zhu and Jonathan Musselwhite and Zonghua Gu and Qingxu Deng", title = "Energy Management for Fault-tolerant $ (m, k)$-constrained Real-time Systems That Use Standby-Sparing", journal = j-TECS, volume = "23", number = "3", pages = "36:1--36:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648365", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3648365", abstract = "Fault tolerance, energy management, and quality of service (QoS) are essential aspects for the design of real-time embedded systems. In this work, we focus on exploring methods that can simultaneously address the above three critical issues under standby-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2024:TEE, author = "Yueting Li and Xueyan Wang and He Zhang and Biao Pan and Keni Qiu and Wang Kang and Jun Wang and Weisheng Zhao", title = "Toward Energy-efficient {STT-MRAM}-based Near Memory Computing Architecture for Embedded Systems", journal = j-TECS, volume = "23", number = "3", pages = "37:1--37:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3650729", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3650729", abstract = "Convolutional Neural Networks (CNNs) have significantly impacted embedded system applications across various domains. However, this exacerbates the real-time processing and hardware resource-constrained challenges of embedded systems. To tackle these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gealy:2024:CPS, author = "Calvin B. Gealy and Alan D. George", title = "Characterizing Parameter Scaling with Quantization for Deployment of {CNNs} on Real-Time Systems", journal = j-TECS, volume = "23", number = "3", pages = "38:1--38:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654799", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3654799", abstract = "Modern deep-learning models tend to include billions of parameters, reducing real-time performance. Embedded systems are compute-constrained while frequently used to deploy these models for real-time systems given size, weight, and power requirements. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Salmon:2024:NCA, author = "Loic Salmon and Pierre-Yves Pillain and Goulven Guillou and Jean-Philippe Babau", title = "{NAVIDRO}, a {CARES} architectural style for configuring drone co-simulation", journal = j-TECS, volume = "23", number = "3", pages = "39:1--39:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3651889", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3651889", abstract = "One primary objective of drone simulation is to evaluate diverse drone configurations and contexts aligned with specific user objectives. The initial challenge for simulator designers involves managing the heterogeneity of drone components, encompassing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Theocharides:2024:ISI, author = "Theocharis Theocharides and Charlotte Frenkel and Lukas Cavigelli", title = "Introduction to the Special Issue on {tinyML}", journal = j-TECS, volume = "23", number = "3", pages = "40:1--40:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3658375", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3658375", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lu:2024:EEE, author = "Qianyun Lu and Boris Murmann", title = "Enhancing the Energy Efficiency and Robustness of {tinyML} Computer Vision Using Coarsely-quantized Log-gradient Input Images", journal = j-TECS, volume = "23", number = "3", pages = "41:1--41:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3591466", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3591466", abstract = "This article studies the merits of applying log-gradient input images to convolutional neural networks (CNNs) for tinyML computer vision (CV). We show that log gradients enable: (i) aggressive 1-bit quantization of first-layer inputs, (ii) potential CNN \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pavan:2024:TAD, author = "Massimo Pavan and Eugeniu Ostrovan and Armando Caltabiano and Manuel Roveri", title = "{TyBox}: an Automatic Design and Code Generation Toolbox for {TinyML} Incremental On-Device Learning", journal = j-TECS, volume = "23", number = "3", pages = "42:1--42:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604566", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3604566", abstract = "Incremental on-device learning is one of the most relevant and interesting challenges in the field of Tiny Machine Learning (TinyML). Indeed, differently from traditional TinyML solutions where the training is typically carried out on the Cloud and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Saha:2024:TPA, author = "Swapnil Sayan Saha and Sandeep Singh Sandha and Mohit Aggarwal and Brian Wang and Liying Han and Julian {De Gortari Briseno} and Mani Srivastava", title = "{TinyNS}: Platform-aware Neurosymbolic Auto Tiny Machine Learning", journal = j-TECS, volume = "23", number = "3", pages = "43:1--43:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3603171", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3603171", abstract = "Machine learning at the extreme edge has enabled a plethora of intelligent, time-critical, and remote applications. However, deploying interpretable artificial intelligence systems that can perform high-level symbolic reasoning and satisfy the underlying \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mazumder:2024:RTR, author = "Arnab Neelim Mazumder and Farshad Safavi and Maryam Rahnemoonfar and Tinoosh Mohsenin", title = "{Reg-Tune}: a Regression-Focused Fine-Tuning Approach for Profiling Low Energy Consumption and Latency", journal = j-TECS, volume = "23", number = "3", pages = "44:1--44:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3623380", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3623380", abstract = "Fine-tuning deep neural networks is pivotal for creating inference modules that can be suitably imported to edge or field-programmable gate array (FPGA) platforms. Traditionally, exploration of different parameters throughout the layers of deep neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2024:SBN, author = "Xinqiao Zhang and Mohammad Samragh and Siam Hussain and Ke Huang and Farinaz Koushanfar", title = "Scalable Binary Neural Network Applications in Oblivious Inference", journal = j-TECS, volume = "23", number = "3", pages = "45:1--45:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607192", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607192", abstract = "Binary neural network (BNN) delivers increased compute intensity and reduces memory/data requirements for computation. Scalable BNN enables inference in a limited time due to different constraints. This paper explores the application of Scalable BNN in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sridhar:2024:SSR, author = "Upasana Sridhar and Nicholai Tukanov and Elliott Binder and Tze Meng Low and Scott McMillan and Martin D. Schatz", title = "{SMaLL}: Software for Rapidly Instantiating Machine Learning Libraries", journal = j-TECS, volume = "23", number = "3", pages = "46:1--46:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607870", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607870", abstract = "Interest in deploying deep neural network (DNN) inference on edge devices has resulted in an explosion of the number and types of hardware platforms that machine learning (ML) libraries must support. High-level programming interfaces, such as TensorFlow, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rashid:2024:TNV, author = "Hasib-Al Rashid and Utteja Kallakuri and Tinoosh Mohsenin", title = "{TinyM 2 Net-V2}: a Compact Low-power Software Hardware Architecture for Multimodal Deep Neural Networks", journal = j-TECS, volume = "23", number = "3", pages = "47:1--47:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3595633", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3595633", abstract = "With the evaluation of Artificial Intelligence (AI), there has been a resurgence of interest in how to use AI algorithms on low-power embedded systems to broaden potential use cases of the Internet of Things (IoT). To mimic multimodal human perception, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cilardo:2024:ASC, author = "Alessandro Cilardo and Vincenzo Maisto and Nicola Mazzocca and Franca Rocco {Di Torrepadula}", title = "An Approach to the Systematic Characterization of Multitask Accelerated {CNN} Inference in Edge {MPSoCs}", journal = j-TECS, volume = "23", number = "3", pages = "48:1--48:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3611015", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3611015", abstract = "Deep Learning is ubiquitous today and is increasingly moving from the cloud down to the edge of networked infrastructures, where it enables embedded applications to perform complex inference tasks close to the data sources, reducing long-distance data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ancilotto:2024:XMM, author = "Alberto Ancilotto and Francesco Paissan and Elisabetta Farella", title = "{XimSwap}: Many-to-Many Face Swapping for {TinyML}", journal = j-TECS, volume = "23", number = "3", pages = "49:1--49:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3603173", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3603173", abstract = "The unprecedented development of deep learning approaches for video processing has caused growing privacy concerns. To ensure data analysis while maintaining privacy, it is essential to address how to protect individuals' identities. One solution is to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Andrade:2024:OPV, author = "Pedro Andrade and Ivanovitch Silva and Marianne Diniz and Thommas Flores and Daniel G. Costa and Eduardo Soares", title = "Online Processing of Vehicular Data on the Edge Through an Unsupervised {TinyML} Regression Technique", journal = j-TECS, volume = "23", number = "3", pages = "50:1--50:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3591356", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3591356", abstract = "The Internet of Things (IoT) has made it possible to include everyday objects in a connected network, allowing them to intelligently process data and respond to their environment. Thus, it is expected that those objects will gain an intelligent \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{ElAdoui:2024:CTM, author = "Mohammed {El Adoui} and Thomas Herpoel and Beno{\^\i}t Fr{\'e}nay", title = "Constrained Tiny Machine Learning for Predicting Gas Concentration with {I4.0} Low-cost Sensors", journal = j-TECS, volume = "23", number = "3", pages = "51:1--51:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3590956", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3590956", abstract = "Low-cost gas sensors (LCS) often produce inaccurate measurements due to varying environmental conditions that are not consistent with laboratory settings, leading to inadequate productivity levels compared to high-quality sensors. To address this issue, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2024:IHA, author = "Chaojian Li and Kyungmin Kim and Bichen Wu and Peizhao Zhang and Hang Zhang and Xiaoliang Dai and Peter Vajda and Yingyan (Celine) Lin", title = "An Investigation on Hardware-Aware Vision Transformer Scaling", journal = j-TECS, volume = "23", number = "3", pages = "52:1--52:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3611387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3611387", abstract = "Vision Transformer (ViT) has demonstrated promising performance in various computer vision tasks, and recently attracted a lot of research attention. Many recent works have focused on proposing new architectures to improve ViT and deploying it into real-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hussein:2024:SND, author = "Dina Hussein and Ganapati Bhat", title = "{SensorGAN}: a Novel Data Recovery Approach for Wearable Human Activity Recognition", journal = j-TECS, volume = "23", number = "3", pages = "53:1--53:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3609425", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue May 14 06:30:25 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3609425", abstract = "Human activity recognition (HAR) and, more broadly, activities of daily life recognition using wearable devices have the potential to transform a number of applications, including mobile healthcare, smart homes, and fitness monitoring. Recent approaches \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hoag:2024:RFM, author = "Ellis Hoag and Kyungwoo Lee and Julian Mestre and Sergey Pupyrev and Yongkang Zhu", title = "Reordering Functions in Mobiles Apps for Reduced Size and Faster Start-Up", journal = j-TECS, volume = "23", number = "4", pages = "54:1--54:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3660635", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3660635", abstract = "Function layout, also known as function reordering or function placement, is one of the most effective profile-guided compiler optimizations. By reordering functions in a binary, compilers can improve the performance of large-scale applications or reduce \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ren:2024:DOL, author = "Haoyu Ren and Darko Anicic and Xue Li and Thomas Runkler", title = "On-device Online Learning and Semantic Management of {TinyML} Systems", journal = j-TECS, volume = "23", number = "4", pages = "55:1--55:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665278", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665278", abstract = "Recent advances in Tiny Machine Learning (TinyML) empower low-footprint embedded devices for real-time on-device Machine Learning (ML). While many acknowledge the potential benefits of TinyML, its practical implementation presents unique challenges. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhade:2024:LHB, author = "Pavitra Bhade and Joseph Paturel and Olivier Sentieys and Sharad Sinha", title = "Lightweight Hardware-Based Cache Side-Channel Attack Detection for Edge Devices ({Edge-CaSCADe})", journal = j-TECS, volume = "23", number = "4", pages = "56:1--56:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663673", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3663673", abstract = "Cache Side-Channel Attacks (CSCAs) have been haunting most processor architectures for decades now. Existing approaches to mitigation of such attacks have certain drawbacks, namely software mishandling, performance overhead, and low throughput due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chielle:2024:CBM, author = "Eduardo Chielle and Oleg Mazonka and Homer Gamil and Michail Maniatakos", title = "Coupling bit and modular arithmetic for efficient general-purpose fully homomorphic encryption", journal = j-TECS, volume = "23", number = "4", pages = "57:1--57:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665280", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665280", abstract = "Fully Homomorphic Encryption (FHE) enables computation directly on encrypted data. This property is desirable for outsourced computation of sensitive data as it relies solely on the underlying security of the cryptosystem and not in access control \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Boudardara:2024:RAM, author = "Fateh Boudardara and Abderraouf Boussif and Pierre-Jean Meyer and Mohamed Ghazel", title = "A Review of Abstraction Methods Toward Verifying Neural Networks", journal = j-TECS, volume = "23", number = "4", pages = "58:1--58:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617508", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3617508", abstract = "Neural networks as a machine learning technique are increasingly deployed in various domains. Despite their performance and their continuous improvement, the deployment of neural networks in safety-critical systems, in particular for autonomous mobility, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ulus:2024:ETP, author = "Dogan Ulus and Thomas Ferr{\`e}re and Eugene Asarin and Dejan Nickovic and Oded Maler", title = "Elements of Timed Pattern Matching", journal = j-TECS, volume = "23", number = "4", pages = "59:1--59:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645114", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/string-matching.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3645114", abstract = "The rise of machine learning and cloud technologies has led to a remarkable influx of data within modern cyber-physical systems. However, extracting meaningful information from this data has become a significant challenge due to its volume and complexity. Timed pattern matching has emerged as a powerful specification-based runtime verification and temporal data analysis technique to address this challenge.\par n this paper, we provide a comprehensive tutorial on timed pattern matching that ranges from the underlying algebra and pattern specification languages to performance analyses and practical case studies. Analogous to textual pattern matching, timed pattern matching is the task of finding all time periods within temporal behaviors of cyber-physical systems that match a predefined pattern. Originally we introduced and solved several variants of the problem using the name of match sets, which has evolved into the concept of timed relations over the past decade. Here we first formalize and present the algebra of timed relations as a standalone mathematical tool to solve the pattern matching problem of timed pattern specifications. In particular, we show how to use the algebra of timed relations to solve the pattern matching problem for timed regular expressions and metric compass logic in a unified manner. We experimentally demonstrate that our timed pattern matching approach performs and scales well in practice. We further provide in-depth insights into the similarities and fundamental differences between monitoring and matching problems as well as regular expressions and temporal logic formulas. Finally, we illustrate the practical application of timed pattern matching through two case studies, which show how to extract structured information from temporal datasets obtained via simulations or real-world observations. These results and examples show that timed pattern matching is a rigorous and efficient technique in developing and analyzing cyber-physical systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Panopoulos:2024:CCA, author = "Ioannis Panopoulos and Stylianos Venieris and Iakovos Venieris", title = "{CARIn}: Constraint-Aware and Responsive Inference on Heterogeneous Devices for Single- and Multi-{DNN} Workloads", journal = j-TECS, volume = "23", number = "4", pages = "60:1--60:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665868", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665868", abstract = "The relentless expansion of deep learning applications in recent years has prompted a pivotal shift toward on-device execution, driven by the urgent need for real-time processing, heightened privacy concerns, and reduced latency across diverse domains. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chakraborty:2024:TTA, author = "Shounak Chakraborty and Yanshul Sharma and Sanjay Moulik", title = "{TREAFET}: Temperature-Aware Real-Time Task Scheduling for {FinFET} based Multicores", journal = j-TECS, volume = "23", number = "4", pages = "61:1--61:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665276", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665276", abstract = "The recent shift in the VLSI industry from conventional MOSFET to FinFET for designing contemporary chip-multiprocessor (CMP) has noticeably improved hardware platforms' computing capabilities, but at the cost of several thermal issues. Unlike the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Algahtani:2024:HAA, author = "Eyad Algahtani", title = "A Hardware Approach For Accelerating Inductive Learning In Description Logic", journal = j-TECS, volume = "23", number = "4", pages = "62:1--62:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665277", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665277", abstract = "The employment of Machine Learning (ML) techniques in embedded systems has seen constant growth in recent years, especially for black-box ML techniques (such as Artificial Neural Networks (ANNs)). However, despite the successful employment of ML \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pan:2024:MTR, author = "Yungang Pan and Rouhollah Mahfouzi and Soheil Samii and Petru Eles and Zebo Peng", title = "Multi-Traffic Resource Optimization for Real-Time Applications with {5G} Configured {Grant} Scheduling", journal = j-TECS, volume = "23", number = "4", pages = "63:1--63:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664621", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3664621", abstract = "The fifth-generation (5G) technology standard in telecommunications is expected to support ultra-reliable low latency communication to enable real-time applications such as industrial automation and control. 5G configured grant (CG) scheduling features a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hada:2024:DCH, author = "Rupendra Pratap Singh Hada and Abhishek Srivastava", title = "Dynamic Cluster Head Selection in {WSN}", journal = j-TECS, volume = "23", number = "4", pages = "64:1--64:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665867", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:15 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665867", abstract = "A Wireless Sensor Network (WSN) comprises an ad-hoc network of nodes laden with sensors that are used to monitor a region mostly in the outdoors and often not easily accessible. Despite exceptions, several deployments of WSN continue to grapple with the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deantoni:2024:ISI, author = "Julien Deantoni and Alain Girault and Daniel Grosse", title = "Introduction to the Special Issue on Specification and Design Languages ({FDL 2021})", journal = j-TECS, volume = "23", number = "5", pages = "65:1--65:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677316", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677316", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Grimm:2024:LGM, author = "Lena Grimm and Steven Smyth and Alexander Schulz-Rosengarten and Reinhard von Hanxleden and Marc Pouzet", title = "From {Lustre} to Graphical Models and {SCCharts}", journal = j-TECS, volume = "23", number = "5", pages = "66:1--66:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3544973", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544973", abstract = "We introduce a systematic approach for automatically creating a visual diagram, akin to the graphical Safety Critical Application Development Environment (SCADE) model, from a Lustre program. This not only saves tedious manual drawing effort but also \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Goli:2024:ESI, author = "Mehran Goli and Rolf Drechsler", title = "Early {SoCs} Information Flow Policies Validation Using {SystemC}-Based Virtual Prototypes at the {ESL}", journal = j-TECS, volume = "23", number = "5", pages = "67:1--67:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3544780", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3544780", abstract = "Virtual Prototypes (VPs) at the Electronic System Level (ESL) are being increasingly adopted by the semiconductor industry and play an important role in modernizing the System-on-Chips (SoCs) design flow to raise design productivity and reduce time-to-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Benmaghnia:2024:CGN, author = "Hanane Benmaghnia and Matthieu Martel and Yassamine Seladji", title = "Code Generation for Neural Networks Based on Fixed-point Arithmetic", journal = j-TECS, volume = "23", number = "5", pages = "68:1--68:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3563945", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3563945", abstract = "Over the past few years, neural networks have started penetrating safety critical systems to make decisions as, for example, in robots, rockets, and autonomous driving cars. Neural networks based on floating-point arithmetic are very time and memory consuming, which are not compatible with embedded systems known to have limited resources. They are also very sensitive to the precision in which they have been trained, so changing this precision generally degrades the quality of their answers. To deal with that, we introduce a new technique to generate a fixed-point code for a trained neural network. This technique is based on fixed-point arithmetic with mixed-precision. This arithmetic is based on integer operations only, which are compatible with small memory devices. The obtained neural network has the same behavior as the initial one (based on the floating-point arithmetic) up to an error threshold defined by the user. The experimental results show the efficiency of our tool SyFix in terms of memory saved and the accuracy of the computations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hui:2024:SSM, author = "John Hui and Stephen A. Edwards", title = "The Sparse Synchronous Model on Real Hardware", journal = j-TECS, volume = "23", number = "5", pages = "69:1--69:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3572920", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3572920", abstract = "We present the Sparse Synchronous model (SSM) of computation, which allows a programmer to specify software timing more precisely than the traditional ``heartbeat'' of mainstream operating systems or the synchronous languages. SSM is a mix of semantics \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aguado:2024:SSM, author = "Joaqu{\'\i}n Aguado and Alejandra Duenas", title = "Synchronised Shared Memory and Model Checking", journal = j-TECS, volume = "23", number = "5", pages = "70:1--70:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626188", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626188", abstract = "In this article, a formal generic framework for defining and reasoning about deterministic concurrency in synchronous systems is implemented in the Spin model checker. Concretely, the work implements the clock-synchronised shared memory (csm) theory, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Thuijsman:2024:SCD, author = "Sander Thuijsman and Michel Reniers", title = "Supervisory Control for Dynamic Feature Configuration in Product Lines", journal = j-TECS, volume = "23", number = "5", pages = "71:1--71:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3579644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3579644", abstract = "In this paper a framework for engineering supervisory controllers for product lines with dynamic feature configuration is proposed. The variability in valid configurations is described by a feature model. Behavior of system components is achieved using \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Peres:2024:PTU, author = "Florent Peres and Mohamed Ghazel", title = "A Proven Translation from a {UML} State Machine Subset to Timed Automata", journal = j-TECS, volume = "23", number = "5", pages = "72:1--72:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3581771", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3581771", abstract = "Although Unified Modeling Language (UML) state machines constitute a convenient modeling formalism that is widely used in many applications, the lack of formal semantics impedes carrying out automatic processing, such as formal verification. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gauthier:2024:HRE, author = "Lovic Gauthier and Yohei Ishikawa", title = "{HDLRuby}: a {Ruby} Extension for Hardware Description and Its Translation to Synthesizable {Verilog HDL}", journal = j-TECS, volume = "23", number = "5", pages = "73:1--73:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3581757", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3581757", abstract = "HDLRuby is a new hardware description language defined as an extension of the Ruby programming language aiming to improve circuit design productivity. HDLRuby allows to model digital circuits at the register transfer level while supporting high-level \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lumpp:2024:DFB, author = "Francesco Lumpp and Marco Panato and Nicola Bombieri and Franco Fummi", title = "A Design Flow Based on {Docker} and {Kubernetes} for {ROS}-based Robotic Software Applications", journal = j-TECS, volume = "23", number = "5", pages = "74:1--74:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3594539", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3594539", abstract = "Human-centered robotic applications are becoming pervasive in the context of robotics and smart manufacturing, and such a pervasiveness is even more expected with the shift to Industry 5.0. The always increasing level of autonomy of modern robotic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Arasteh:2024:FLT, author = "Emad M. Arasteh and Rainer D{\"o}mer", title = "Fast Loosely-Timed Deep Neural Network Models with Accurate Memory Contention", journal = j-TECS, volume = "23", number = "5", pages = "75:1--75:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607548", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607548", abstract = "The emergence of data-intensive applications, such as Deep Neural Networks (DNN), exacerbates the well-known memory bottleneck in computer systems and demands early attention in the design flow. Electronic System-Level (ESL) design using SystemC \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gretz:2024:SSM, author = "Friedrich Gretz and Franz-Josef Grosch and Michael Mendler and Stephan Scheele", title = "Synchronized Shared Memory and Black-box Procedural Abstraction: Toward a Formal Semantics of Blech", journal = j-TECS, volume = "23", number = "5", pages = "76:1--76:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3571585", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3571585", abstract = "Traditional imperative synchronous programming languages heavily rely on a strict separation between data memory and communication signals. Signals can be shared between computational units but cannot be overwritten within a synchronous reaction cycle. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lohstroh:2024:DCA, author = "Marten Lohstroh and Soroush Bateni and Christian Menard and Alexander Schulz-Rosengarten and Jeronimo Castrillon and Edward A. Lee", title = "Deterministic Coordination across Multiple Timelines", journal = j-TECS, volume = "23", number = "5", pages = "77:1--77:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3615357", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3615357", abstract = "We discuss a novel approach for constructing deterministic reactive systems that revolves around a temporal model that incorporates a multiplicity of timelines. This model is central to Lingua Franca (LF), a polyglot coordination language and compiler \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Narang:2024:TTE, author = "Gaurav Narang and Chukwufumnanya Ogbogu and Janardhan Rao Doppa and Partha Pratim Pande", title = "{TEFLON}: Thermally Efficient Dataflow-aware {$3$D} {NoC} for Accelerating {CNN} Inferencing on Manycore {PIM} Architectures", journal = j-TECS, volume = "23", number = "5", pages = "78:1--78:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665279", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3665279", abstract = "Resistive random-access memory (ReRAM)-based processing-in-memory (PIM) architectures are used extensively to accelerate inferencing/training with convolutional neural networks (CNNs). Three-dimensional (3D) integration is an enabling technology to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2024:HSD, author = "Xingbin Wang and Boyan Zhao and Yulan Su and Sisi Zhang and Fengkai Yuan and Jun Zhang and Dan Meng and Rui Hou", title = "A Hybrid Sparse-dense Defensive {DNN} Accelerator Architecture against Adversarial Example Attacks", journal = j-TECS, volume = "23", number = "5", pages = "79:1--79:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677318", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677318", abstract = "Understanding how to defend against adversarial attacks is crucial for ensuring the safety and reliability of these systems in real-world applications. Various adversarial defense methods are proposed, which aim at improving the robustness of neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dixit:2024:PPA, author = "Akanksha Dixit and Smruti R. Sarangi", title = "{PredATW}: Predicting the Asynchronous Time Warp Latency For {VR} Systems", journal = j-TECS, volume = "23", number = "5", pages = "80:1--80:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677329", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677329", abstract = "With the advent of low-power ultra-fast hardware and GPUs, virtual reality (VR) has gained a lot of prominence in the past few years and is being used in various areas, such as education, entertainment, scientific visualization, and computer-aided design. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2024:DMD, author = "Chia-Hao Li and Niraj K. Jha", title = "{DOCTOR}: a Multi-Disease Detection Continual Learning Framework Based on Wearable Medical Sensors", journal = j-TECS, volume = "23", number = "5", pages = "81:1--81:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3679050", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3679050", abstract = "Modern advances in machine learning (ML) and wearable medical sensors (WMSs) in edge devices have enabled ML-driven disease detection for smart healthcare. Conventional ML-driven methods for disease detection rely on customizing individual models for each \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hafezan:2024:TFD, author = "Mohammad Hassan Hafezan and Ehsan Atoofian", title = "Transient Fault Detection in Tensor Cores for Modern {GPUs}", journal = j-TECS, volume = "23", number = "5", pages = "82:1--82:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687483", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687483", abstract = "Deep neural networks (DNNs) have emerged as an effective solution for many machine learning applications. However, the great success comes with the cost of excessive computation. The Volta graphics processing unit (GPU) from NVIDIA introduced a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2024:LBR, author = "Meng Wang and Yiqin Lu and Haihan Wang and Zhuoxing Chen and Jiancheng Qin", title = "Load-balanced Routing Heuristics for Bandwidth Allocation of {AVB} Flow in {TSN}", journal = j-TECS, volume = "23", number = "5", pages = "83:1--83:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687307", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687307", abstract = "Time-Sensitive Networking (TSN) is a new technology developed from Ethernet that guarantees deterministic transmission of various types of flows, such as Time-triggered (TT) flows and Audio-video-bridging (AVB) flows, in the same network. Currently, Time-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gomez:2024:ODF, author = "Clara Gomez and Davron Patkhullaev and Alejandra C. Hernandez", title = "{OffloaD}: Detection Failure-based Scheduler for Offloading Object Detection", journal = j-TECS, volume = "23", number = "5", pages = "84:1--84:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677321", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:16 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677321", abstract = "The current times ask for resource-constrained devices such as drones, light mobile robots, XR glasses, or mobile phones to perform object detection efficiently and in real time. However, when executed on the device, object detection fails to achieve the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shi:2024:ISI, author = "Liang Shi and Jingtong Shi and Hussam Amrouch and Kuan-Hsun Chen and Mengying Zhao and Weichen Liu", title = "Introduction to Special Issue on In\slash Near Memory and Storage Computing for Embedded Systems", journal = j-TECS, volume = "23", number = "6", pages = "85:1--85:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677018", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677018", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2024:ATV, author = "Fenfang Li and Huizhang Luo and Junqi Wang and Yida Li and Zhuo Tang and Kenli Li", title = "{AMP}: Total Variation Reduction for Lossless Compression via Approximate Median-based Preconditioning", journal = j-TECS, volume = "23", number = "6", pages = "86:1--86:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605359", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605359", abstract = "With the increasing scale of cloud computing applications of next-generation embedded systems, a major challenge that domain scientists are facing is how to efficiently store and analyze the vast volume of output data. Compression can reduce the amount of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ye:2024:HEA, author = "Chongnan Ye and Meng Chen and Qisheng Jiang and Chundong Wang", title = "{Hercules}: Enabling Atomic Durability for Persistent Memory with Transient Persistence Domain", journal = j-TECS, volume = "23", number = "6", pages = "87:1--87:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3607473", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3607473", abstract = "Persistent memory (pmem) products bring the persistence domain up to the memory level. Intel recently introduced the eADR feature that guarantees to flush data buffered in CPU cache to pmem on a power outage, thereby making the CPU cache a transient \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Du:2024:AMC, author = "Sichun Du and Jun Li and Chen Sun and Pingdan Xiao and Qinghui Hong and Jiliang Zhang", title = "Analog In-memory Circuit Design of Polynomial Multiplication for Lattice Cipher Acceleration Application", journal = j-TECS, volume = "23", number = "6", pages = "88:1--88:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605891", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3605891", abstract = "As the core operation of lattice cipher, large-scale polynomial multiplication is the biggest computational bottleneck in its realization process. How to quickly calculate polynomial multiplication under resource constraints has become an urgent problem \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gao:2024:SSW, author = "Xin Gao and Hongyue Wang and Yiyan Chen and Yuhao Zhang and Zhaoyan Shen and Lei Ju", title = "Static Scheduling of Weight Programming for {DNN} Acceleration with Resource Constrained {PIM}", journal = j-TECS, volume = "23", number = "6", pages = "89:1--89:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3615657", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3615657", abstract = "Most existing architectural studies on ReRAM-based processing-in-memory (PIM) DNN accelerators assume that all weights of the DNN can be mapped to the crossbar at once. However, these studies are over-idealized. ReRAM crossbar resources for calculation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhunia:2024:RDS, author = "Kousik Bhunia and Arighna Deb and Kamalika Datta and Muhammad Hassan and Saeideh Shirinzadeh and Rolf Drechsler", title = "{ReSG}: a Data Structure for Verification of Majority-based In-memory Computing on {ReRAM} Crossbars", journal = j-TECS, volume = "23", number = "6", pages = "90:1--90:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3615358", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3615358", abstract = "Recent advancements in the fabrication of Resistive Random Access Memory (ReRAM) devices have led to the development of large-scale crossbar structures. In-memory computing architectures relying on ReRAM crossbars aim to mitigate the processor-memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liang:2024:REE, author = "Dehua Liang and Hiromitsu Awano and Noriyuki Miura and Jun Shiomi", title = "A Robust and Energy Efficient Hyperdimensional Computing System for Voltage-scaled Circuits", journal = j-TECS, volume = "23", number = "6", pages = "91:1--91:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3620671", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3620671", abstract = "Voltage scaling is one of the most promising approaches for energy efficiency improvement but also brings challenges to fully guaranteeing stable operation in modern VLSI. To tackle such issues, we further extend the DependableHD to the second version \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Byun:2024:AMB, author = "Hongsu Byun and Safdar Jamil and Jungwook Han and Sungyong Park and Myungcheol Lee and Changsoo Kim and Beongjun Choi and Youngjae Kim", title = "An Analytical Model-based Capacity Planning Approach for Building {CSD}-based Storage Systems", journal = j-TECS, volume = "23", number = "6", pages = "92:1--92:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3623677", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3623677", abstract = "The data movement in large-scale computing facilities (from compute nodes to data nodes) is categorized as one of the major contributors to high cost and energy utilization. To tackle it, in-storage processing (ISP) within storage devices, such as Solid-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2024:ACA, author = "Hui Sun and Bendong Lou and Chao Zhao and Deyan Kong and Chaowei Zhang and Jianzhong Huang and Yinliang Yue and Xiao Qin", title = "Asynchronous Compaction Acceleration Scheme for Near-data Processing-enabled {LSM}-tree-based {KV} Stores", journal = j-TECS, volume = "23", number = "6", pages = "93:1--93:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626097", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3626097", abstract = "LSM-tree-based key-value stores (KV stores) convert random-write requests to sequence-write ones to achieve high I/O performance. Meanwhile, compaction operations in KV stores update SSTables in forms of reorganizing low-level data components to high-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bera:2024:SSP, author = "Pavia Bera and Stephen Cahoon and Sanjukta Bhanja and Alex Jones", title = "{SPIMulator}: a Spintronic Processing-in-memory Simulator for Racetracks", journal = j-TECS, volume = "23", number = "6", pages = "94:1--94:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645112", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3645112", abstract = "In-memory processing is becoming a popular method to alleviate the memory bottleneck of the Von Neumann computing model. With the goal of improving both latency and energy cost associated with such in-memory processing, emerging non-volatile memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2024:RRC, author = "Kunyu Zhou and Keni Qiu", title = "{REC}: {REtime} Convolutional Layers to Fully Exploit Harvested Energy for {ReRAM}-based {CNN} Accelerators", journal = j-TECS, volume = "23", number = "6", pages = "95:1--95:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652593", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3652593", abstract = "As the Internet of Things (IoTs) increasingly combines AI technology, it is a trend to deploy neural network algorithms at edges and make IoT devices more intelligent than ever. Moreover, energy-harvesting technology-based IoT devices have shown the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pandey:2024:NTM, author = "Shailja Pandey and Preeti Ranjan Panda", title = "{NeuroTAP}: Thermal and Memory Access Pattern-Aware Data Mapping on {$3$D} {DRAM} for Maximizing {DNN} Performance", journal = j-TECS, volume = "23", number = "6", pages = "96:1--96:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677178", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677178", abstract = "Deep neural networks (DNNs) have been widely adopted, owing to break-through performance and high accuracy. DNNs exhibit varying memory behavior involving specific and recognizable memory access patterns and access intensity, depending on the selected \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2024:HPP, author = "Zhuanhao Wu and Anirudh Kaushik and Hiren Patel", title = "High Performance and Predictable Shared Last-level Cache for Safety-Critical Systems", journal = j-TECS, volume = "23", number = "6", pages = "97:1--97:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687308", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687308", abstract = "We propose ZeroCost-LLC (ZCLLC), a novel shared inclusive last-level cache (LLC) design for timing predictable multi-core platforms that offers lower worst-case latency (WCL) when compared with a traditional shared inclusive LLC design. ZCLLC achieves low \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2024:ODI, author = "Runqing Xu and Debiao He and Min Luo and Cong Peng and Xiangyong Zeng", title = "Optimizing {Dilithium} Implementation with {AVX2\slash-512}", journal = j-TECS, volume = "23", number = "6", pages = "98:1--98:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687309", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687309", abstract = "Dilithium is a signature scheme that is currently being standardized to the Module-Lattice-Based Digital Signature Standard by NIST. It is believed to be secure even against attacks from large-scale quantum computers based on lattice problems. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kashikar:2024:CWA, author = "Prachi Kashikar and Olivier Sentieys and Sharad Sinha", title = "Combining Weight Approximation, Sharing and Retraining for Neural Network Model Compression", journal = j-TECS, volume = "23", number = "6", pages = "99:1--99:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687466", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687466", abstract = "Neural network model compression is very important to achieve model deployment based on the memory and storage available in different computing systems. Generally, the continuous drive for higher accuracy in these models increases their size and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Weerasena:2024:RCA, author = "Hansika Weerasena and Prabhat Mishra", title = "Revealing {CNN} Architectures via Side-Channel Analysis in Dataflow-based Inference Accelerators", journal = j-TECS, volume = "23", number = "6", pages = "100:1--100:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3688001", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3688001", abstract = "Convolutional Neural Networks (CNNs) are widely used in various domains, including image recognition, medical diagnosis and autonomous driving. Recent advances in dataflow-based CNN accelerators have enabled CNN inference in resource-constrained edge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mamish:2024:NSP, author = "John Mamish and Rawan Alharbi and Sougata Sen and Shashank Holla and Panchami Kamath and Yaman Sangar and Nabil Alshurafa and Josiah Hester", title = "{NIR-sighted}: a Programmable Streaming Architecture for Low-Energy Human-Centric Vision Applications", journal = j-TECS, volume = "23", number = "6", pages = "101:1--101:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3672076", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3672076", abstract = "Human studies often rely on wearable lifelogging cameras that capture videos of individuals and their surroundings to aid in visual confirmation or recollection of daily activities like eating, drinking, and smoking. However, this may include private or \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2024:EMB, author = "Zijing Jiang and Qun Ding and An Wang", title = "Efficient Multi-Byte Power Analysis Architecture Focusing on Bitwise Linear Leakage", journal = j-TECS, volume = "23", number = "6", pages = "102:1--102:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687484", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687484", abstract = "As the most commonly used side-channel analysis method, Correlation Power Analysis (CPA) usually uses the divide-and-conquer strategy to guess the single-byte key in the scenario of block cipher parallel implementation. However, this method cannot \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Basak:2024:ELM, author = "Barnali Basak and Pallab Dasgupta and Arpan Pal", title = "Efficient Low-Memory Implementation of Sparse {CNNs} Using Encoded Partitioned Hybrid Sparse Format", journal = j-TECS, volume = "23", number = "6", pages = "103:1--103:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687239", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687239", abstract = "Certain data compression techniques like pruning leads to unstructured sparse Convolution Neural Network (CNN) models without directly leveraging sparsity in optimizing both memory consumption and inference latency of a model having low to medium \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kutukcu:2024:SAI, author = "Basar Kutukcu and Sabur Baidya and Sujit Dey", title = "{SLEXNet}: Adaptive Inference Using Slimmable Early Exit Neural Networks", journal = j-TECS, volume = "23", number = "6", pages = "104:1--104:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689632", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Sep 25 11:16:17 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3689632", abstract = "Deep learning is a proven method in many applications. However, it requires high computation resources and usually has a constant architecture. Mobile systems are good candidates to benefit from deep learning applications since they are closely integrated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2025:EAI, author = "Junqiang Jiang and Shengjie Jin and Zhifang Sun and Jinxue Duan and Lizhi Liu and Li Pan and Zebo Peng", title = "An Efficient Approach for Improving Message Acceptance Rate and Link Utilization in Time-Sensitive Networking", journal = j-TECS, volume = "24", number = "1", pages = "1:1--1:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3690638", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3690638", abstract = "Time-sensitive networking (TSN) is an emerging technology widely used in real-time systems for its high bandwidth and deterministic timing properties. To ensure the deterministic transmission of Time-triggered (TT) messages, a guard band mechanism is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Semenova:2025:CSS, author = "Sofiya Semenova and Steven Ko and Yu David Liu and Lukasz Ziarek and Karthik Dantu", title = "A Comprehensive Study of Systems Challenges in Visual Simultaneous Localization and Mapping Systems", journal = j-TECS, volume = "24", number = "1", pages = "2:1--2:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677317", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677317", abstract = "Visual SLAM systems are concurrent, performance-critical systems that respond to real-time environmental conditions and are frequently deployed on resource-constrained hardware. Previous work has identified three interconnected systems challenges to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xia:2025:TBA, author = "Zhuoqun Xia and Ziyu Wang and Xiao Liu", title = "Trust Based Active Game Data Collection Scheme in Smart Cities", journal = j-TECS, volume = "24", number = "1", pages = "3:1--3:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677319", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677319", abstract = "The concept of a smart city is to equip sensors to various objects in urban life to monitor areas and collect sensing data, and make wise decisions based on the collected data. However, some malicious sensor devices may interrupt and interfere with data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2025:IRI, author = "Gao-Yu Lin and Po-Yuan Wang and Shin-Ming Cheng and Hahn-Ming Lee", title = "Improving Robustness in {IoT} Malware Detection through Execution Order Analysis", journal = j-TECS, volume = "24", number = "1", pages = "4:1--4:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3689427", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3689427", abstract = "The rapid expansion of the Internet of Things (IoT) has significantly increased the prevalence of malware targeting IoT devices. Although machine learning models offer promising solutions for automatic malware detection, they are increasingly vulnerable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Garbay:2025:ZCD, author = "Thomas Garbay and Khalil Hachicha and Petr Dobias and Andrea Pinna and Karim Hocine and Wilfried Dron and Pedro Lusich and Imane Khalis and Bertrand Granado", title = "{ZIP-CNN}: Design Space Exploration for {CNN} Implementation within a {MCU}", journal = j-TECS, volume = "24", number = "1", pages = "5:1--5:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3691343", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3691343", abstract = "Embedded systems based on Microcontroller Units (MCUs) often gather significant quantities of data and solve various issues. Convolutional Neural Networks (CNNs) have proven their effectiveness in solving computer vision and natural language processing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mohammad:2025:CAE, author = "Rafiuzzaman Mohammad and Sathish Gopalakrishnan and Karthik Pattabiraman", title = "Co-Approximator: Enabling Performance Prediction in Colocated Applications.", journal = j-TECS, volume = "24", number = "1", pages = "6:1--6:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677180", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3677180", abstract = "Today's Internet of Things (IoT) devices can colocate multiple applications on a platform with hardware resource sharing. Such colocations allow for increasing the throughput of contemporary IoT applications, similar to the use of multi-tenancy in clouds. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lam:2025:MMR, author = "Kam-Yiu Lam and Xiaofei Zhao and Chunjiang Zhu and Tei-Wei Kuo", title = "{MVLevelDB+}: Meeting Relative Consistency Requirements of Temporal Queries in Sensor Stream Databases", journal = j-TECS, volume = "24", number = "1", pages = "7:1--7:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3694787", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3694787", abstract = "Ensuring relative consistency in executing temporal queries to access real-time sensor data streams maintained in a database is a challenging problem, particularly when data transmission delays are lengthy and highly variable. Due to the unordered \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fischer:2025:TAC, author = "Thilo Leon Fischer and Heiko Falk", title = "Towards Analysing Cache-Related Preemption Delay in Non-Inclusive Cache Hierarchies", journal = j-TECS, volume = "24", number = "1", pages = "8:1--8:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3695768", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3695768", abstract = "The impact of preemptions has to be considered when determining the schedulability of a task set in a preemptively scheduled system. In particular, the contents of caches can be disturbed by a preemption, thus creating context-switching costs. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Argotti:2025:OQM, author = "Yann Argotti and Yasmine Kenfaoui and Claude Baron and Alain Abran and Philippe Esteban", title = "An Operational Quality Model of Embedded Software Aligned with {ISO 25000}", journal = j-TECS, volume = "24", number = "1", pages = "9:1--9:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3691642", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3691642", abstract = "Embedded systems omnipresent in everyday life and industry are mainly composed of hardware and software that must comply with a number of standards and regulations. However, there is no consensus on the quality characteristics and subcharacteristics of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kundu:2025:SES, author = "Suparna Kundu and Quinten Norga and Angshuman Karmakar and Shreya Gangopadhyay and Jose Maria Bermudo Mera and Ingrid Verbauwhede", title = "{Scabbard}: an Exploratory Study on Hardware Aware Design Choices of Learning with Rounding-based Key Encapsulation Mechanisms", journal = j-TECS, volume = "24", number = "1", pages = "10:1--10:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3696208", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3696208", abstract = "Recently, the construction of cryptographic schemes based on hard lattice problems has gained immense popularity. Apart from being quantum resistant, lattice-based cryptography allows a wide range of variations in the underlying hard problem. As \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Goel:2025:EFE, author = "Shikha Goel and Rajesh Kedia and Rijurekha Sen and M. Balakrishnan", title = "{EXPRESS}: a Framework for Execution Time Prediction of Concurrent {CNNs} on {Xilinx} {DPU} Accelerator", journal = j-TECS, volume = "24", number = "1", pages = "11:1--11:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3697835", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3697835", abstract = "Deep learning Processor Unit (DPU) is a highly configurable CNN accelerator that supports a variety of CNNs and can be implemented with multiple instances on the same FPGA. Many applications deploy concurrent execution of different CNNs and in such a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rathor:2025:AFE, author = "Mahendra Rathor", title = "{ALOHA-FP2I}: Efficient Algorithms and Hardware for Multi-Mode Rounding of Floating Point to Integer", journal = j-TECS, volume = "24", number = "1", pages = "12:1--12:26", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701560", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3701560", abstract = "Modern technology is relying on hardware accelerators to achieve enhanced performance of computing systems. In the modern computing paradigm, floating point representation of numbers has gained popularity owing to its wide dynamic range. Rounding of floating point numbers to integer is used in modern processor architectures e.g., ARM and Intel's architecture (IA) as well as in specific applications such as multimedia. However, the academic literature lacks discussion on hardware designs for rounding binary floating point numbers to integer in different rounding modes. This article presents novel efficient algorithms and hardware architecture designs for rounding binary floating point numbers to the integer for the following rounding modes: round towards zero, round up (towards positive infinity), round down (towards negative infinity), round to the nearest integer, and round to nearest even. The article also proposes an integrated multi-mode rounding (IMR) algorithm and hardware design which can be configured to a specific rounding mode among the above-mentioned five modes. This article proposes a mantissa bit of rounding (MBR) to determine the condition of rounding for the various modes. The MBR is identified on the basis of the dynamic range and precision features of floating point representation. To the best of our knowledge, we present the individual as well as an integrated hardware design for the various rounding modes for the first time in the literature. The proposed designs have been implemented on an FPGA platform to analyze the design metrics such as area, delay, and power. The results imply that the proposed designs are suitable to aid the intended hardware accelerators as they are efficient in terms of the design parameters. Moreover, this article presents the integration of the proposed rounding hardware design with the compression processor and evaluates the integration overhead which is found to be nominal (<1\%).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tijero:2025:ALE, author = "Hector Perez Tijero and J. Javier Guti{\'e}rrez Garc{\'\i}a and Diego Garc{\'\i}a Prieto", title = "Application-Level Evaluation of {IEEE 802.1AS} Synchronized Time and {Linux} for Distributed Real-Time Systems", journal = j-TECS, volume = "24", number = "1", pages = "13:1--13:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701300", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", URL = "https://dl.acm.org/doi/10.1145/3701300", abstract = "The use of Ethernet and Linux is becoming common in industrial applications, even for those with real-time requirements, although neither of them were originally designed for this purpose. The emergence of Industry 4.0 (also known as Industrial Internet 4.0) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zyarah:2025:TSF, author = "Abdullah Zyarah and Dhireesha Kudithipudi", title = "Time-Series Forecasting and Sequence Learning Using Memristor-based Reservoir System", journal = j-TECS, volume = "24", number = "1", pages = "14:1--14:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703446", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3703446", abstract = "Pushing the frontiers of time-series information processing in the ever-growing domain of edge devices with stringent resources has been impeded by the systems' ability to process information and learn locally on the device. Local processing and learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sahin:2025:HAE, author = "Suhap Sahin and Oguz Narli and Muhammet Bahadir T{\"u}rkoglu and Hikmetcan {\"O}zcan", title = "Hardware Area Efficient and Real-Time {FPGA} Implementation of {PHMMRGB}", journal = j-TECS, volume = "24", number = "1", pages = "15:1--15:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701727", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3701727", abstract = "For encryption applications on embedded systems, operating in real-time while using minimal system resources is essential. It is expected that efficient and rapid encryption of high-resolution images is to be accomplished with limited hardware resources. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{He:2025:HHT, author = "Pengzhou He and Yazheng Tu and Tianyou Bao and {\c{C}}etin {\c{C}}etin Ko{\c{c}} and Jiafeng Xie", title = "{HSPA}: High-Throughput Sparse Polynomial Multiplication for Code-based Post-Quantum Cryptography", journal = j-TECS, volume = "24", number = "1", pages = "16:1--16:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703837", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3703837", abstract = "Increasing attention has been paid to code-based post-quantum cryptography (PQC) schemes, e.g., HQC (Hamming Quasi-Cyclic) and BIKE (Bit Flipping Key Encapsulation), since they've been selected as the fourth-round National Institute of Standards and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fahmida:2025:RRT, author = "Sezana Fahmida and Aakriti Jain and Venkata Prashant Modekurthy and Dali Ismail and Abusayeed Saifullah", title = "{RTPL}: a Real-Time Communication Protocol for {LoRa} Network", journal = j-TECS, volume = "24", number = "1", pages = "17:1--17:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3702209", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3702209", abstract = "The industrial Internet of Things (IIoT) is prominently emerging in applications of large-scale and wide-area applications, such as oilfield management, smart grid management, real-time equipment monitoring, and integration of traffic management systems \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2025:CGT, author = "Liangliang Chang and Serhan Gener and Joshua Mack and Hasan Umut Suluhan and Ali Akoglu and Chaitali Chakrabarti", title = "Coarse-Grained Task Parallelization by Dynamic Profiling for Heterogeneous {SoC}-Based Embedded System", journal = j-TECS, volume = "24", number = "1", pages = "18:1--18:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3704635", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3704635", abstract = "In this study, we introduce a methodology for automatically transforming user applications written in C/C++ to a parallel representation consisting of coarse-grained tasks based on dynamic profiling. Such a parallel representation is suitable for mapping \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "18", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ha:2025:SOD, author = "Soonhoi Ha and Eunjin Jeong", title = "Software Optimization and Design Methodology for Low Power Computer Vision Systems", journal = j-TECS, volume = "24", number = "1", pages = "19:1--19:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3687310", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687310", abstract = "This tutorial article addresses a low power computer vision system as an example of a growing application domain of neural networks, exploring various technologies developed to enhance accuracy within the resource and performance constraints imposed by \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "19", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mack:2025:TNR, author = "Joshua Mack and Anish Krishnakumar and Umit Ogras and Ali Akoglu", title = "Tutorial: a Novel Runtime Environment for Accelerator-Rich Heterogeneous Architectures", journal = j-TECS, volume = "24", number = "1", pages = "20:1--20:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3687463", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3687463", abstract = "As the landscape of computing advances, system designers are increasingly exploring methodologies that leverage higher levels of heterogeneity to enhance performance within constrained size, weight, power, and cost parameters. CEDR (Compiler-integrated \ldots{}).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "20", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Luo:2025:EDL, author = "Xiangzhong Luo and Di Liu and Hao Kong and Shuo Huai and Hui Chen and Guochu Xiong and Weichen Liu", title = "Efficient Deep Learning Infrastructures for Embedded Computing Systems: a Comprehensive Survey and Future Envision", journal = j-TECS, volume = "24", number = "1", pages = "21:1--21:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701728", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3701728", abstract = "Deep neural networks (DNNs) have recently achieved impressive success across a wide range of real-world vision and language processing tasks, spanning from image classification to many other downstream vision tasks, such as object detection, tracking, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gunzel:2025:EEL, author = "Mario G{\"u}nzel and Harun Teper and Georg von der Br{\"u}ggen and Jian-Jia Chen", title = "End-To-End Latency of Cause--Effect Chains: a Tutorial", journal = j-TECS, volume = "24", number = "1", pages = "22:1--22:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703630", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Jan 2 07:31:47 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", URL = "https://dl.acm.org/doi/10.1145/3703630", abstract = "In many applications of cyber-physical systems, a sequence of tasks is necessary to perform a certain functionality. For example, from a sensor to an actuator, the first task reads the sensor value (cause), the second task processes the data, and the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "22", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsu:2025:SME, author = "Chen-Fong Hsu and Hong-Sheng Zheng and Yu-Yuan Liu and Tsung Tai Yeh", title = "{StreamNet++}: Memory-Efficient Streaming {TinyML} Model Compilation on Microcontrollers", journal = j-TECS, volume = "24", number = "2", pages = "23:1--23:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706107", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rapid growth of on-device artificial intelligence increases the importance of TinyML inference applications. However, the stringent tiny memory space on the microcontroller unit (MCU) raises the grand challenge when deploying deep neural network (DNN) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "23", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2025:SST, author = "Shihang Zhou and Alejandra C. Hernandez and Clara Gomez and Wenjie Yin and M{\aa}rten Bj{\"o}rkman", title = "{SmartTBD}: Smart Tracking for Resource-constrained Object Detection", journal = j-TECS, volume = "24", number = "2", pages = "24:1--24:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703912", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the growing demand for video analysis on mobile devices, object tracking has demonstrated to be a suitable assistance to object detection under the Tracking-By-Detection (TBD) paradigm for reducing computational overhead and power demands. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "24", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ni:2025:HHE, author = "Ziying Ni and Ayesha Khalid and Weiqiang Liu and M{\'a}ire O'Neill", title = "A Highly Hardware Efficient {ML-KEM} Accelerator with Optimised Architectural Layers", journal = j-TECS, volume = "24", number = "2", pages = "25:1--25:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708469", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Module-Lattice-Based Key encapsulation Mechanism (ML-KEM) scheme, which is currently being standardised, is a quantum attack resistant KEM that is based on CRYSTALS-Kyber. CRYSTALS-Kyber is the only Public-key Encryption (PKE)/ KEM scheme selected in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shi:2025:CFI, author = "Zhengyuan Shi and Cheng Chen and Gangqiang Yang and Hongchao Zhou and Hailiang Xiong and Zhiguo Wan", title = "Customized {FPGA} Implementation of Authenticated Lightweight Cipher Fountain for {IoT} Systems", journal = j-TECS, volume = "24", number = "2", pages = "26:1--26:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3643039", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Authenticated Encryption with Associated-Data (AEAD) can ensure both confidentiality and integrity of information in encrypted communication. Distinctive variants are customized from AEAD to satisfy various requirements. In this paper, we take a 128-bit \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "26", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hutto:2025:IPH, author = "Kevin Hutto and Vincent Mooney", title = "Implementing Privacy Homomorphism with Random Encoding and Computation Controlled by a Remote Secure Server", journal = j-TECS, volume = "24", number = "2", pages = "27:1--27:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3651617", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Remote IoT devices face significant security risks due to their inherent physical vulnerability. An adversarial actor with sufficient capability can monitor the devices or exfiltrate data to access sensitive information. Remotely deployed devices such as \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "27", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Achar:2025:LHF, author = "Sagar Dev Achar and Thejaswini P. and Sukumar Nandi and Sunit Nandi", title = "{LiteHash}: Hash Functions for Resource-Constrained Hardware", journal = j-TECS, volume = "24", number = "2", pages = "28:1--28:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677181", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The global paradigm shift toward edge computing has led to a growing demand for efficient integrity verification. Hash functions are one-way algorithms which act as a zero-knowledge proof of a datum's contents. However, it is infeasible to compute hashes on devices with limited processing power and memory. Hence, we propose four novel LiteHash functions which are architecturally similar to SHA-512 yet simpler. By using various approximation techniques, our implementations reduce the computational costs of digesting a message into a hash. On validating our proposed designs using the NIST PRNG Test Suite, we observe SHA-512 equivalent cryptographic security while satisfying all desired hash function property requirements. We observe a minimum of 9.41\% reduction in area, 20.47\% reduction in power, and 22.05\% increase in throughput. Our designs offer a throughput of up to 2 Gbps while reducing area and power by a maximum of 16.86\% and 32.48\%, respectively. LiteHash functions also support the computation of the entire SHA-2 family of hash functions (SHA-224/256/384/512) with minor architectural modifications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "28", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hsu:2025:ATA, author = "Shih-Wen Hsu and Yen-Ting Chen and Kam-Yiu Lam and Yuan-Hao Chang and Wei-Kuan Shih and Han-Chieh Chao", title = "{APB-tree}: an Adaptive Pre-built Tree Indexing Scheme for {NVM}-based {IoT} Systems", journal = j-TECS, volume = "24", number = "2", pages = "29:1--29:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the proliferation of sensors and the emergence of novel applications, IoT data has grown exponentially in recent years. Given this trend, efficient data management is crucial for a system to easily access vast amounts of information. For decades, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "29", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kwon:2025:SUH, author = "Jungyoon {\-}Kwon and Hyemi Min and Bernhard Egger", title = "{SENNA}: Unified {Hardware\slash} Software Space Exploration for Parametrizable Neural Network Accelerators", journal = j-TECS, volume = "24", number = "2", pages = "30:1--30:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705731", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Parametrizable neural network accelerators enable the deployment of targeted hardware for specialized environments. Finding the best architecture configuration for a given specification, however, is challenging. A large number of hardware configurations \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "30", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ratti:2025:HCF, author = "Francesco Ratti and Johannes Kn{\"o}dtel and Marc Reichenbach", title = "{HeterogeneousRTOS}: a {CPU-FPGA} Real-Time {OS} for Fault Tolerance on {COTS} at Near-Zero Timing Cost", journal = j-TECS, volume = "24", number = "2", pages = "31:1--31:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712062", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Ionizing particles in the atmosphere may strike circuits causing Single Event Upsets (SEU), affecting the output correctness. Critical real-time systems are traditionally custom-designed, featuring redundancy for guaranteeing fault resilience. The \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "31", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Aghapour:2025:PDD, author = "Saeed Aghapour and Kasra Ahmadi and Mila Anastasova and Reza Azarderakhsh and Mehran Mozaffari Kermani", title = "{PUF-Dilithium}: Design of a {PUF}-Based {Dilithium} Architecture Benchmarked on {ARM} Processors", journal = j-TECS, volume = "24", number = "2", pages = "32:1--32:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715328", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Addressing the looming threat posed by quantum computers capable of breaching current public key cryptography schemes has become imperative. To this end, the National Institute of Standards and Technology (NIST) initiated a competition in Post-Quantum Cryptography, resulting in the selection of four schemes as the new standardized replacements, while a fourth round and an additional signature round is still ongoing. Notably, CRYSTALS-Dilithium, a lattice-based signature scheme, has exhibited promising resilience due to its efficiency and simplicity. Despite the finalization of standardization for these new four schemes, transitioning from classical cryptography to these alternatives necessitates further investigation and analysis. Comprehensive scrutiny of these newly standardized schemes is imperative, including considerations of implementation efficiency across various platforms and side-channel vulnerability analysis. This article introduces a novel design leveraging physical unclonable functions to bolster the physical security of CRYSTALS-Dilithium. Physical security is paramount in scenarios where network nodes are exposed to public scrutiny, potentially making them targets for adversaries. After discussing the advantages of our design compared to the original design, we implemented it on two different architectures, ARMv7 and ARMv8. Our results indicate substantial improvements in both security and performance compared to existing references. Moreover, noting the new competition initiated by the NIST in 2023 for new signatures (first round finalized in October 2024), potentially the proposed schemes can be adopted to the new standards set to be finalized in the coming years. These make our scheme not solely confined to the current standards and would be an important merit of the presented approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "32", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dobkin:2025:RFR, author = "Daniel Dobkin and Nimrod Cever and Itamar Levi", title = "{RAD-FS}: Remote Timing and Power {SCA} Security in {DVFS}-augmented Ultra-Low-Power Embedded Systems", journal = j-TECS, volume = "24", number = "2", pages = "33:1--33:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711836", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "High-performance crypto-engines have become crucial components in modern System-On-Chip (SoC) architectures across platforms, from servers to edge-IoTs'. Alas, their secure operation faces a significant obstacle caused by information-leakage accessed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2025:CPS, author = "Jong-Yeon Park and Seonggyeom Kim and Wonil Lee and Bo Gyeong Kang and Il-Jong Song and Jaekeun Oh and Kouichi Sakurai", title = "A Compact and Parallel Swap-Based Shuffler Based on Butterfly Network and Its Complexity Against Side Channel Analysis", journal = j-TECS, volume = "24", number = "2", pages = "34:1--34:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715961", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A prominent countermeasure against side-channel attacks, the hiding countermeasure, typically involves shuffling operations using a permutation algorithm. This is especially crucial in the era of Post-quantum Cryptography, where computational \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "34", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2025:HTS, author = "Yang Liu and Zihan Wang and Mengchi Cai and Qing Xu and Keqiang Li", title = "A Hybrid Target Selection Model of Functional Safety Compliance for Autonomous Driving System", journal = j-TECS, volume = "24", number = "2", pages = "35:1--35:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716631", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The autonomous driving system faces challenges in selecting critical targets under dense environments with limited computation resources. Existing rule-based methods struggle with complex scenarios, while learning-based approaches lack interpretability \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "35", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lubeck:2025:AGF, author = "Konstantin L{\"u}beck and Alexander Louis-Ferdinand Jung and Felix Wedlich and Mika Markus M{\"u}ller and Federico Nicol{\'a}s Peccia and Felix Th{\"o}mmes and Jannik Steinmetz and Valentin Biermaier and Adrian Frischknecht and Paul Palomero Bernardo and Oliver Bringmann", title = "Automatic Generation of Fast and Accurate Performance Models for Deep Neural Network Accelerators", journal = j-TECS, volume = "24", number = "2", pages = "36:1--36:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715122", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:36 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Implementing Deep Neural Networks (DNNs) on resource-constrained edge devices is a challenging task that requires tailored hardware accelerator architectures and a clear understanding of their performance characteristics when executing the intended AI \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "36", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yu:2025:FMH, author = "Wangyang Yu and Qi Guo and Yumeng Cheng and Lu Liu and Fei Hao and Xiaojun Zhai and Minsi Chen", title = "Formal Modeling of Hybrid System Based on Semi-continuous Colored {Petri} Net: a Case Study of Adaptive Cruise Control System", journal = j-TECS, volume = "24", number = "3", pages = "37:1--37:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715960", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Many Next-Generation consumer electronic devices would be distributed hybrid electronic systems, such as UAVs (Unmanned Aerial Vehicles) and smart electronic cars. The safety and risk control are the key issues for the sustainability of such consumer \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "37", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2025:MEL, author = "Jingjing Chang and Peining Zhen and Xiaotao Yan and Yixin Yang and Ziyang Gao and Haibao Chen", title = "{MemATr}: an Efficient and Lightweight Memory-augmented Transformer for Video Anomaly Detection", journal = j-TECS, volume = "24", number = "3", pages = "38:1--38:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719203", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Anomaly detection in videos is a long-standing and challenging problem. Previous methods often adopt deep and large neural networks to achieve the best detection accuracy; however, the high computational costs prevent them from being used in real-world \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "38", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Taufique:2025:EAR, author = "Zain Taufique and Anil Kanduri and Antonio Miele and Amir Rahmani and Cristiana Bolchini and Nikil Dutt and Pasi Liljeberg", title = "Exploiting Approximation for Run-time Resource Management of Embedded {HMPs}", journal = j-TECS, volume = "24", number = "3", pages = "39:1--39:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723357", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Run-time resource management (RTM) of multi-programmed workloads on heterogeneous multi-core platforms is challenging due to (i) fixed power budget of the device, (ii) variable performance requirements of the workloads, and (iii) unknown arrival of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "39", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chang:2025:FTF, author = "Qiong Chang and Xinyuan Chen and Xiang Li and Weimin Wang and Jun Miyazaki", title = "Faster than Fast: Accelerating Oriented {FAST} Feature Detection on Low-end Embedded {GPUs}", journal = j-TECS, volume = "24", number = "3", pages = "40:1--40:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725217", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The visual-based SLAM (Simultaneous Localization and Mapping) is a technology widely used in applications such as robotic navigation and virtual reality, which primarily focuses on detecting feature points from visual images to construct an unknown \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "40", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ramezani:2025:FCP, author = "Zahra Ramezani and Kenan Sehi{\'c} and Luigi Nardi and Knut {\AA}kesson", title = "Falsification of Cyber-physical Systems Using {Bayesian} Optimization", journal = j-TECS, volume = "24", number = "3", pages = "41:1--41:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711922", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPSs) are often complex and safety-critical, making it both challenging and crucial to ensure that the system's specifications are met. Simulation-based falsification is a practical testing technique for increasing confidence in a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "41", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tao:2025:WPS, author = "Zhiyong Tao and Zhelun Wang and Ying Liu and Yuqing He and Yikai Wang", title = "Wireless Perceptual Space Modeling Method for Cross-Domain Human Activity Recognition", journal = j-TECS, volume = "24", number = "3", pages = "42:1--42:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3724119", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Existing Wi-Fi perceptual recognition using Doppler Frequency Shift (DFS) can portray human activity and behavioral features, but the method is affected by the user's movement direction, position, and other factors, resulting in large differences in the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "42", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2025:ERE, author = "Bokyung Kim and Shiyu Li and Brady Taylor and Yiran Chen", title = "Efficient and Robust Edge {AI}: Software, Hardware, and the Co-design", journal = j-TECS, volume = "24", number = "3", pages = "43:1--43:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3724396", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Artificial intelligence (AI) provides versatile capabilities in applications such as image classification and voice recognition that are most useful in edge or mobile computing settings. Shrinking these sophisticated algorithms into small form factors \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "43", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Brandt:2025:ISI, author = "Jens Brandt and Indranil Saha and Lijun Zhang", title = "Introduction to the Special Issue on Formal Methods and Models for System Design", journal = j-TECS, volume = "24", number = "3", pages = "44:1--44:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722218", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "44", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Honorat:2025:RTF, author = "Alexandre Honorat and Hai Nam Tran and Thierry Gautier and Lo{\"\i}c Besnard and Shuvra Bhattacharyya and Jean-Pierre Talpin", title = "Real-time Fixed Priority Scheduling Synthesis Using Affine {DataFlow} Graphs: from Theory to Practice", journal = j-TECS, volume = "24", number = "3", pages = "45:1--45:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3615586", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The major drawback of using static schedules to execute dataflow applications is their high inflexibility. In real-time systems, periodic schedules make it easier to assert safety guarantees and to decrease the schedule size, but their characteristics \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "45", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sultan:2025:AML, author = "Bastien Sultan and L{\'e}on Fr{\'e}not and Ludovic Apvrille and Philippe Jaillon and Sophie Coudert", title = "{AMULET}: a Mutation Language Enabling Automatic Enrichment of {SysML} Models", journal = j-TECS, volume = "24", number = "3", pages = "46:1--46:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3624583", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "SysML models are widely used for designing and analyzing complex systems. Model-based design methods often require successive modifications of the models, whether for incrementally refining the design (e.g., in agile development methods) or for testing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "46", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Choi:2025:RAS, author = "Sung Woo Choi and Mykhailo Ivashchenko and Luan Nguyen and Dung Tran", title = "Reachability Analysis of Sigmoidal Neural Networks", journal = j-TECS, volume = "24", number = "3", pages = "47:1--47:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3627991", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article extends the star set reachability approach to verify the robustness of feed-forward neural networks (FNNs) with sigmoidal activation functions such as Sigmoid and TanH. The main drawbacks of the star set approach in Sigmoid/TanH FNN \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "47", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2025:DFS, author = "Anup Das", title = "Design Flow for Scheduling Spiking Deep Convolutional Neural Networks on Heterogeneous Neuromorphic System-on-chip", journal = j-TECS, volume = "24", number = "3", pages = "48:1--48:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3635032", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Neuromorphic systems-on-chip (NSoCs) integrate CPU cores and neuromorphic hardware accelerators on the same chip. These platforms can execute spiking deep convolutional neural networks (SDCNNs) with a low energy footprint. Modern NSoCs are heterogeneous \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "48", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2025:IFA, author = "Navin Kumar and Sandeep K. Sood and Munish Saini", title = "{IoV}-Fog-Assisted Framework for Accident Detection and Classification", journal = j-TECS, volume = "24", number = "3", pages = "49:1--49:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3633805", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The evolution of vehicular research into an effectuating area like the Internet of Vehicles (IoV) was verified by technical developments in hardware. The integration of the Internet of Things (IoT) and Vehicular Ad-hoc Networks (VANET) has significantly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "49", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Panda:2025:SPU, author = "Abhinandan Panda and Srinivas Pinisetty and Partha Roop", title = "Securing Pacemakers Using Runtime Monitors over Physiological Signals", journal = j-TECS, volume = "24", number = "3", pages = "50:1--50:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3638286", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wearable and implantable medical devices (IMDs) are increasingly deployed to diagnose, monitor, and provide therapy for critical medical conditions. Such medical devices are safety-critical cyber-physical systems (CPSs). These systems support wireless \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "50", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Valente:2025:MBT, author = "Hugo Valente and Miguel de Miguel and {\'A}ngel P{\'e}rez-Mu{\~n}oz and Alejandro Alonso and Juan Zamorano and Juan {De La Puente}", title = "Model-based Toolchain for Core Flight System ({cFS}) Embedded Systems", journal = j-TECS, volume = "24", number = "3", pages = "51:1--51:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706587", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The space domain is experiencing a paradigm shift with the rise of micro- and nanosatellites. Historically, launching a satellite required a big financial risk only sustained by governments or big companies. Nowadays, with the miniaturization of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "51", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2025:EFB, author = "Ruiqi Hu and Kairong Liu and Zhikun She", title = "Evolution Function Based Reach-Avoid Verification for Time-varying Systems with Disturbances", journal = j-TECS, volume = "24", number = "3", pages = "52:1--52:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3626099", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri May 16 07:02:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, we investigate the reach-avoid problem of a class of time-varying analytic systems with disturbances described by uncertain parameters. Firstly, by proposing the concepts of maximal and minimal reachable sets, we connect the avoidability and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "52", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2025:EEP, author = "Yi-Wen Zhang and Rong-Kun Chen", title = "Energy-Efficient {Partitioned-RM} Scheduling for Shared Resources Imprecise Mixed-Criticality Tasks", journal = j-TECS, volume = "24", number = "4", pages = "53:1--53:30", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Shared resources and energy consumption are important factors to consider in the design of mixed-criticality systems. Existing works have studied these two factors separately. In this article, we simultaneously focus on shared resources and energy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "53", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Li:2025:DIA, author = "Chunlin Li and Sen Liu and Kun Jiang and Mengjie Yang and Zihao Zhang and Bingxin Wang and Liang Zhao and Chen Chen and Shaohua Wan", title = "{DNN} Inference Acceleration Based on Adaptive Task Partitioning and Offloading in Embedded {VEC}", journal = j-TECS, volume = "24", number = "4", pages = "54:1--54:35", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725734", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As a distributed embedded system, vehicular edge computing (VEC) completes various complex Deep neural network (DNN) tasks through network collaboration and communication. However,due to the limited computing power of vehicle processors, vehicles cannot \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "54", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2025:WBA, author = "Zihao Zhao and Yanhong Wang and Xu Jin and Haotian Zheng and Maohua Nie and Longfei Gou and Junmin He and Yongchuan Dong and Qiaosha Zou and Yiyun Zhang and C.-J. Richard Shi", title = "A Workload-Balance-Aware Accelerator Enabling Dense-to-Arbitrary-Sparse Neural Networks", journal = j-TECS, volume = "24", number = "4", pages = "55:1--55:25", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725532", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Deep neural networks (DNNs) have proved their great potential over various perceptual and cognitive tasks with the cost of ever-growing storage capacity and computation complexity. Sparse representations in neural networks have emerged as a compelling \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "55", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jiang:2025:TLG, author = "Renshuang Jiang and Pan Dong and Yan Ding and Ran Wei and Zhe Jiang", title = "Thetis-lathe: Guidance on Reducing Residual Safety Obstacle in System Software from {Rust} Source Codes", journal = j-TECS, volume = "24", number = "4", pages = "56:1--56:25", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736729", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/rust.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Programming languages play a crucial role in ensuring the safety of the Operating System (OS). Traditional low-level languages (e.g., C, C++), while high-performance, usually offer very limited protections on safety, and their vulnerability patches (e.g., C, C++), while high-performance, usually offer very limited protections on safety, and their vulnerability patches (e.g., AddressSanitizer, DangSan), while effective in mitigating some issues, are often too expensive. Rust language combines memory safety with performance, providing a fresh paradigm for constructing efficient, reliable, and dependable. However, existing Rust rely on unsafe code fragments to interface with low-level hardware and other programming languages, introducing critical issues: (1) compromised system-wide safety due to the presence of unsafe code, (2) inaccurate defect detection because of unavoidable interactions between unsafe and safe code; and (3) difficulty in finding an optimal balance between accuracy and efficiency of defect detection and elimination.\par In contrast to the previous work, we believe --- ``prevention is always better than cure'' Therefore, we propose a new methodology (namely Thetis) to detect and guide the minimization of unsafe fragments in Rust source code. For unsafe code detection, Thetis designs an automated inspection method based on feature extraction. For unsafe code elimination based on Unsafe Rust types and interchangeability, Thetis prop defect optimization suggestions and designs a framework to automatically provide safer code recommendations. We have designed and implemented a new tool called Thetis-lathe based on Thetis and have also ported Thetis-lathe to three mainstream Rust applications, i.e., BlogOS, rCore, and Miri Failure Set. Evaluations show that our tool improved the accuracy of defects and decreased the amount of unsafe code by 35\% and undefined behavior by approximately 50\%. Furthermore, Thetis-lathe speeds up the run-time about $ 5 \times $ compared with the sanitizer and LMbench results indicate that our approach introduces 7.6\% (average) performance overhead on the entire system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "56", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pathak:2025:TAR, author = "Karan Pathak and Joshua Klein and Giovanni Ansaloni and Said Hamdioui and Georgi Gaydadjiev and Marina Zapater and David Atienza", title = "Towards Accurate {RISC-V} Full System Simulation via Component-Level Calibration", journal = j-TECS, volume = "24", number = "4", pages = "57:1--57:19", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737876", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Full-System (FS) simulation is essential for performance evaluation of complete systems that execute complex applications on a complete software stack consisting of an operating system and user applications. Nevertheless, they require careful fine-tuning against real hardware to obtain reliable performance statistics, which can become tedious, error-prone, and time-consuming with typical trial-and-error approaches. We propose a novel, streamlined, component-level calibration methodology to address these shortcomings to validate FS simulation models. Our methodology greatly accelerates the validation process without sacrificing accuracy. It is Instruction Set Architecture (ISA)-agnostic, and can tackle hardware specifications at different levels of detail. We demonstrate its effectiveness by validating FS models against both open-hardware and IP-protected (closed hardware) RISC-V silicon, achieving a mean error of 19\%--23\% for the SPEC CPU2017 suite in the two cases. We introduce the first open-source RISC-V-based FS-validated simulation models with a complete and replicable methodology.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "57", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Angioli:2025:EIL, author = "Marco Angioli and Marcello Barbirotta and Abdallah Cheikh and Antonio Mastrandrea and Francesco Menichelli and Mauro Olivieri", title = "Efficient Implementation of {LinearUCB} through Algorithmic Improvements and Vector Computing Acceleration for Embedded Learning Systems", journal = j-TECS, volume = "24", number = "4", pages = "58:1--58:23", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736226", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the Internet of Things expands, embedding Artificial Intelligence algorithms in resource-constrained devices has become increasingly important to enable real-time, autonomous decision-making without relying on centralized cloud servers. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "58", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shao:2025:NLB, author = "Cuiping Shao and Wenzhe Li and Huiyun Li and Zhimin Tang and Jianing Liang", title = "A Novel Lattice-Based Fault Injection Attack Targeting the Nonce in the {SM2} Digital Signature Algorithm", journal = j-TECS, volume = "24", number = "4", pages = "59:1--59:21", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744246", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In embedded systems, particularly resource-constrained Internet of Things (IoT) devices, the SM2 Digital Signature Algorithm (SM2-DSA) standard is widely deployed for cryptographic security. While fault injection attacks can compromise digital signatures \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "59", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sole:2025:MDA, author = "Joan Miquel Sol{\'e} and Roger Pueyo Centelles and Felix Freitag and Roc Meseguer and Roger Baig", title = "Middleware for Distributed Applications in a {LoRa} Mesh Network", journal = j-TECS, volume = "24", number = "4", pages = "60:1--60:26", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747295", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recently, LoRa mesh networks have gained increasing interest as a communication layer for sending data between IoT nodes. However, the network service of the firmware on the microcontroller-based nodes is typically limited to sending and receiving LoRa \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "60", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2025:WGI, author = "Ying Liu and Zhiyang Cao and Jiaqi Cai and Yuqing He and Mingzhe Hu", title = "{Wi-GPD} Identification System Based on Gait Point Density", journal = j-TECS, volume = "24", number = "4", pages = "61:1--61:24", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746639", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "A significant challenge currently facing Wi-Fi-based gait recognition technology is that changes in walking paths in a multipath environment can significantly interfere with the CSI gait signal collected via Wi-Fi, which greatly hinders the application of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "61", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2025:AMB, author = "Qingbin Wang and Yuchen Pei and Wai Chon Wong and Xuefeng Mu and Yan Zhang and Yutao Ma", title = "Activation Map-based Knowledge Distillation for Real-time Cervical {OCT} Image Classification", journal = j-TECS, volume = "24", number = "4", pages = "62:1--62:27", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746229", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cervical cancer is a significant global health concern for women. Optical coherence tomography (OCT) offers a non-invasive, high-resolution imaging method for cervical examinations. The clinical need for real-time AI-aided diagnosis in low-resource \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "62", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gonidec:2025:DTP, author = "Gwenn {Le Gonidec} and Guillaume Bouffard and Jean-Christophe Prevotet and Maria Mndez Real", title = "Do Not Trust Power Management: a Survey on Internal Energy-based Attacks Circumventing Trusted Execution Environments Security Properties", journal = j-TECS, volume = "24", number = "4", pages = "63:1--63:35", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3735556", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Sep 23 06:45:59 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Over the past few years, several research groups have introduced innovative hardware designs for Trusted Execution Environments (TEEs), aiming to secure applications against potentially compromised privileged software, including the kernel [ 10 , 63 ]. Since \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "63", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tempelmeier:2025:SIO, author = "Michael Tempelmeier and Fabrizio {De Santis} and Shivam Bhasin and Stefan Mangard", title = "Special Issue on Open Hardware for Embedded System Security and Cryptography", journal = j-TECS, volume = "24", number = "5", pages = "64:1--64:3", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747326", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "64", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mohajerani:2025:LCW, author = "Kamyar Mohajerani and Luke Beckwith and Abubakr Abdulgadir and Jens-Peter Kaps and Kris Gaj", title = "Lightweight Champions of the World: Side-Channel Resistant Open Hardware for Finalists in the {NIST Lightweight Cryptography} Standardization Process", journal = j-TECS, volume = "24", number = "5", pages = "65:1--65:25", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3677320", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cryptographic competitions have played a significant role in stimulating the development and release of open hardware for cryptography. The primary reason was the focus of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "65", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Karl:2025:PCC, author = "Patrick Karl and Jonas Schupp and Georg Sigl", title = "Performance and Communication Cost of Hardware Accelerators for Hashing in Post-Quantum Cryptography", journal = j-TECS, volume = "24", number = "5", pages = "66:1--66:31", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3676965", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "SPHINCS+ is a signature scheme included in the first NIST post-quantum standard that bases its security on the underlying hash primitive. As most of the runtime of SPHINCS+ is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "66", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ciani:2025:UOP, author = "Maicol Ciani and Emanuele Parisi and Alberto Musa and Francesco Barchi and Andrea Bartolini and Ari Kulmala and Rafail Psiakis and Angelo Garofalo and Andrea Acquaviva and Rossi Davide", title = "Unleashing {OpenTitan}'s Potential: a Silicon-Ready Embedded Secure Element for Root of Trust and Cryptographic Offloading", journal = j-TECS, volume = "24", number = "5", pages = "67:1--67:29", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3690823", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rapid advancement and exploration of open-hardware RISC-V platforms are catalyzing substantial changes across critical sectors, including autonomous vehicles, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "67", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kostalabros:2025:LHD, author = "Ioannis-Vatistas Kostalabros and Jordi Ribes and Xavier Carril and Oriol Farras and Carles Hernandez and Miquel Moreto", title = "Leveraging {HLS} to Design a Versatile \& High-Performance Classic {McEliece} Accelerator", journal = j-TECS, volume = "24", number = "5", pages = "68:1--68:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3698395", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "By harnessing fundamental quantum properties, a large-scale quantum computer could undermine currently deployed public-key algorithms. The post-quantum, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "68", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Deshpande:2025:SAE, author = "Sanjay Deshpande and Yongseok Lee and Cansu Karakuzu and Jakub Szefer and Yunheung Paek", title = "{SPHINCSLET}: an Area-Efficient Accelerator for the Full {SPHINCS+} Digital Signature Algorithm", journal = j-TECS, volume = "24", number = "5", pages = "69:1--69:19", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3728469", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This work presents SPHINCSLET, the first fully standard-compliant and area-efficient hardware implementation of the SLH-DSA algorithm, formerly known as SPHINCS+, a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "69", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sacchetti:2025:AFA, author = "Tommaso Sacchetti and Marton Bognar and Jesse {De Meulemeester} and Benedikt Gierlichs and Frank Piessens and Volodymyr Bezsmertnyi and Maria Chiara Molteni and Stefano Cristalli and Arianna Gringiani and Olivier Thomas and Daniele Antonioli", title = "{AttackDefense} Framework ({ADF}): Enhancing {IoT} Devices and Lifecycles Threat Modeling", journal = j-TECS, volume = "24", number = "5", pages = "70:1--70:34", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3698396", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Threat modeling (TM) is essential to manage, prevent, and fix security and privacy issues in our society. TM requires a data model to represent threats and tools to exploit such data. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "70", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{ElBouazzati:2025:DLH, author = "Mohamed {El Bouazzati} and Philippe Tanguy and Guy Gogniat and Russell Tessier", title = "{Diwall}: a Lightweight Host Intrusion Detection System Against Jamming and Packet Injection Attacks", journal = j-TECS, volume = "24", number = "5", pages = "71:1--71:30", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711833", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rapid growth of Internet of Things (IoT) applications in various sectors has led to a significant increase in the number of IoT devices. This has led to the deployment of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "71", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mahanta:2025:ADU, author = "Ashish Mahanta and Haibo Wang", title = "Abnormality Detection Using Power Rising and Descending Signature ({PRIDES})", journal = j-TECS, volume = "24", number = "5", pages = "72:1--72:18", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711834", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a new paradigm for abnormality detection using a novel power signature that characterizes the rising and descending patterns of energy \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "72", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wu:2025:SCA, author = "Zheng Wu and Lin Ding and Zhengting Li and Xinhai Wang and Ziyu Guan", title = "Side Channel Attacks on {GPRS} Standard Encryption Algorithms", journal = j-TECS, volume = "24", number = "5", pages = "73:1--73:16", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "GEA-1 and its successor GEA-2 are stream ciphers that were selected as the General Packet Radio Service (GPRS) standard encryption algorithms, used to protect the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "73", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2025:VDO, author = "Yongliang Chen and Xiaole Cui and Sunrui Zhang and Xiaoxin Cui", title = "A {VC} Dimension-Oriented Improvement Method of {PUFs} for the Anti-Modeling-Attack Capability", journal = j-TECS, volume = "24", number = "5", pages = "74:1--74:30", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3727340", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The physical unclonable function (PUF) serves as a security primitive of circuits, which is applicable to the embedded systems with lightweight authentication function. However, the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "74", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2025:AAA, author = "Naina Gupta and Arpan Jati and Anupam Chattopadhyay", title = "{AI} Attacks {AI}: Recovering Neural Network Architecture from {NVDLA} Using {AI}-Assisted Side Channel Attack", journal = j-TECS, volume = "24", number = "5", pages = "75:1--75:29", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731560", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "During the last decade, there has been a stunning progress in the domain of Artificial Intelligence (AI) aided by highly trained Machine Learning (ML) models. Such models are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "75", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hao:2025:FSC, author = "Xinpeng Hao and Xiangxue Li", title = "{FirmCAN}: Sensitive {CAN} Knowledge Leakage from Automotive {ECUs}", journal = j-TECS, volume = "24", number = "5", pages = "76:1--76:24", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711832", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As de facto standards of in-vehicle network communications among various ECUs (Electronic Control Units), CAN (Controller Area Network) protocols invented by Bosch rely on the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "76", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Doglioni:2025:CAS, author = "Maria Doglioni and Eren Yildiz and Matteo Nardello and Khakim Akhunov and Kasim Sinan Yildirim and Davide Brunelli", title = "{CapDYN}: Adaptive Self-Scaling Energy Storage for Powering Batteryless {IoT}", journal = j-TECS, volume = "24", number = "5", pages = "77:1--77:32", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3737288", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Battery-free devices collect the harvested ambient energy in their energy storage capacitors. The size of the storage capacitor is one of the main factors affecting the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "77", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Xu:2025:TVT, author = "Liang Xu and Hongrui Song and Lan Tian and Zhongfeng Wang and Meiqi Wang", title = "{TAFP-ViT}: a Transformer Accelerator via {QKV} Computational Fusion and Adaptive Pruning for Vision Transformer", journal = j-TECS, volume = "24", number = "5", pages = "78:1--78:21", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3745028", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The remarkable progress of Vision Transformer (ViT) models has significantly advanced performance in computer vision tasks. However, the deployment of ViTs in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "78", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ahmadi:2025:EAL, author = "Kasra Ahmadi and Saeed Aghapour and Mehran Mozaffari Kermani and Reza Azarderakhsh", title = "Efficient Algorithm-Level Error Detection for Number-Theoretic Transform Used for {Kyber} Assessed on {FPGAs} and {ARM}", journal = j-TECS, volume = "24", number = "5", pages = "79:1--79:23", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762186", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Polynomial multiplication stands out as a highly demanding arithmetic process in the development of post-quantum cryptosystems. The importance of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "79", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anthimopoulos:2025:RBS, author = "Theologos Anthimopoulos and Georgios Keramidas and Vasilios Kelefouras and Iakovos Stamoulis", title = "Register Blocking: a Source-to-Source Analytical Modelling Approach for Affine Loop Kernels", journal = j-TECS, volume = "24", number = "5", pages = "80:1--80:24", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747183", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Register Blocking (RB), also known as `Register-level Tiling' or `unroll-and-jam,' is a key compiler optimization for developing efficient micro-kernels. However, applying RB \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "80", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2025:UAS, author = "Aranya Gupta and Amit Surpur and Bishnu Prasad Das and Sanjeev Manhas", title = "A Unified Approach to a Secure and Lightweight Mutual Authentication Protocol Using Pre-Characterized {COTS SRAM ICs} for {IoT} Applications", journal = j-TECS, volume = "24", number = "5", pages = "81:1--81:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748328", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traditional Physical Unclonable Function (PUF)-based authentication protocols are vulnerable to machine learning attacks and evolving cyber threats. Moreover, these protocols lack \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "81", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gutierrez-Zaballa:2025:ODB, author = "Jon Guti{\'e}rrez-Zaballa and Koldo Basterretxea and Javier Echanobe", title = "Optimization of {DNN}-based {HSI} Segmentation {FPGA}-based {SoC} for {ADS}: a Practical Approach", journal = j-TECS, volume = "24", number = "5", pages = "82:1--82:27", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748722", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The use of hyperspectral imaging (HSI) for autonomous navigation is a promising field of research that aims at improving the accuracy and robustness of detection, tracking, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "82", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banerjee:2025:PSP, author = "Debarpita Banerjee and Sumana Ghosh", title = "{P$^2$SDS}: a Polynomial-Time Pattern-Guided Stable Dynamic Scheduling for Weakly Hard Control Task Systems", journal = j-TECS, volume = "24", number = "5", pages = "83:1--83:33", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748329", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 2 11:05:37 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time scheduling of control tasks in a weakly hard system, where the tasks can miss a few of their deadlines without impeding the system's performance, is a riveting research \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "83", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fathy:2025:TMD, author = "Mohamed Fathy and Hassan Nassar and Mohamed Abd {El Ghany} and J{\"o}rg Henkel", title = "Timekeepers: {ML}-Driven {SDF} Analysis for Power-Wasters Detection in {FPGAs}", journal = j-TECS, volume = "24", number = "5s", pages = "84:1--84:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761809", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As the integration of FPGAs into cloud computing platforms accelerates, the risk of fault injection attacks --- especially through power-wasting designs --- becomes increasingly critical. Malicious tenants can upload FPGA designs that, under specific input \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "84", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tombesi:2025:FFI, author = "Gabriele Tombesi and Je Yang and Joseph Zuckerman and Davide Giri and William Baisi and Luca Carloni", title = "{FLIP2M}: Flexible Intra-layer Parallelism and Inter-layer Pipelining for Multi-model {AR\slash VR} Workloads", journal = j-TECS, volume = "24", number = "5s", pages = "85:1--85:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762656", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Tiled accelerator architectures provide opportunities to optimize the performance of multi-model augmented and virtual reality (AR/VR) applications through intra-layer parallelism and inter-layer pipelining. However, balancing these two strategies is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "85", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lai:2025:IPI, author = "Zewei Lai and Jinhui Ye and Xiaohang Wang and Zheang Fu and Amit Kumar Singh and Yingtao Jiang and Kui Ren and Mei Yang and Sihai Qiu and Xiaodong Li and Xin Tang and Jie Song and Mingzhe Zhang", title = "On Improving the Performance of Intra- and Inter-chiplet Interconnection Networks in Multi-chiplet Systems for Accelerating {FHE} Encrypted Neural Network Applications", journal = j-TECS, volume = "24", number = "5s", pages = "86:1--86:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762995", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Fully Homomorphic Encryption (FHE) is regarded as a promising way to protect data privacy with encrypted computation. Due to high computation overhead, hardware based FHE accelerators were proposed to speed up FHE applications. To support complicated FHE-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Joshi:2025:SSA, author = "Dinesh Joshi and Aritra Bagchi and Preeti Ranjan Panda", title = "{SHARP}: {SHARing}-Aware Cache Writeback {byPass}", journal = j-TECS, volume = "24", number = "5s", pages = "87:1--87:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760746", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In modern multi-processor systems-on-chips (MPSoCs), writebacks from the private caches to the shared cache can introduce significant performance bottlenecks, especially because multiple threads from different co-executing programs contend for the shared \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "87", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pal:2025:PSP, author = "Priyanjana Pal and Tara Gheshlaghi and Haibin Zhao and Michael Hefenbrock and Michael Beigl and Mehdi Tahoori", title = "{PRINT-SAFE}: Printed Ultra-Low-Cost Electronic {X}-Design with Scalable Adaptive Fault Endurance", journal = j-TECS, volume = "24", number = "5s", pages = "88:1--88:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758096", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The demand for next-generation flexible electronics in applications like smart packaging and smart bandages has driven the need for cost-effective solutions. Traditional silicon-based electronics struggle with high costs and rigidity, making them \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "88", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gufran:2025:GGA, author = "Danish Gufran and Sudeep Pasricha", title = "{GATE}: Graph Attention Neural Networks with Real-Time Edge Construction for Robust Indoor Localization using Mobile Embedded Devices", journal = j-TECS, volume = "24", number = "5s", pages = "89:1--89:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758322", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Accurate indoor localization is crucial for enabling spatial context in smart environments and navigation systems. Wi-Fi Received Signal Strength (RSS) fingerprinting is a widely used indoor localization approach due to its compatibility with mobile \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "89", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Verma:2025:LEH, author = "Yashika Verma and Debadatta Mishra and Mainak Chaudhuri", title = "{LeakyRand}: an Efficient High-fidelity Covert Channel in Fully Associative Last-level Caches with Random Eviction", journal = j-TECS, volume = "24", number = "5s", pages = "90:1--90:29", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761797", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent studies on secure last-level cache (LLC) have advocated the fully associative organization to defend against conflict-based side-channel attacks. In a fully associative LLC, an attacker cannot extract any information about the cache location of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "90", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gener:2025:RRI, author = "Serhan Gener and Aditya Ukarande and Shilpa {Mysore Srinivasa murthy} and Sahil Hassan and Joshua Mack and Chaitali Chakrabarti and Umit Ogras and Ali Akoglu", title = "{RIMMS}: Runtime Integrated Memory Management System for Heterogeneous Computing", journal = j-TECS, volume = "24", number = "5s", pages = "91:1--91:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760257", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Efficient memory management in heterogeneous systems is increasingly challenging due to diverse compute architectures (e.g., CPU, GPU, and FPGA) and dynamic task mappings not known at compile time. Existing approaches often require programmers to manage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "91", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kwon:2025:LBA, author = "Yongin Kwon and Joohyoung Cha and Sehyeon Oh and Misun Yu and Jeman Park and Jemin Lee", title = "{Luthier}: Bridging Auto-Tuning and Vendor Libraries for Efficient Deep Learning Inference", journal = j-TECS, volume = "24", number = "5s", pages = "92:1--92:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759916", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Recent deep learning compilers commonly adopt auto-tuning approaches that search for the optimal kernel configuration in tensor programming from scratch, requiring tens of hours per operation and neglecting crucial optimization factors for parallel \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "92", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Jayasena:2025:FCD, author = "Aruna Jayasena and Sai Suprabhanu Nallapaneni and Prabhat Mishra", title = "{FuSS}: Coverage-Directed Hardware Fuzzing with Selective Symbolic Execution", journal = j-TECS, volume = "24", number = "5s", pages = "93:1--93:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760529", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Fuzzing is a promising validation method to detect design flaws as well as security vulnerabilities in a wide variety of electronic systems. Traditional fuzzing methods can outperform validation using random test vectors but they can lead to a coverage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "93", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2025:ELS, author = "Szu-Wei Chen and Shuo-Han Chen", title = "Exploiting {LDPC} Syndrome for Multidimensional Hard-Decoding Read Retry on {NAND} Flash", journal = j-TECS, volume = "24", number = "5s", pages = "94:1--94:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760259", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "NAND-flash-based solid-state drives (SSDs) are under constant pressure to deliver higher storage density while minimizing power and performance overhead. As the number of bits stored per NAND flash cell has scaled from single-level cells (SLC) to triple-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "94", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Dhingra:2025:EEE, author = "Pratyush Dhingra and Chibuike Ugwu and Jana Doppa and Partha Pande", title = "{ERGo}: Energy-Efficient Hybrid Graph Neural Network Training on Processing-in-Memory Architectures", journal = j-TECS, volume = "24", number = "5s", pages = "95:1--95:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760402", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Processing-in-memory (PIM) has been proposed as an alternative computing paradigm for training Deep Neural Networks, including Graph Neural Networks (GNNs). Despite these advancements, training GNN workloads on PIM devices necessitates off-chip memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "95", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sharma:2025:DHP, author = "Harsh Sharma and Jana Doppa and Umit Ogras and Partha Pande", title = "Designing High-Performance and Thermally Feasible Multi-Chiplet Architectures Enabled by Non-Bendable Glass Interposer", journal = j-TECS, volume = "24", number = "5s", pages = "96:1--96:28", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762644", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multi-chiplet architectures enabled by glass interposer offer superior electrical performance, enable higher bus widths due to reduced crosstalk, and have lower capacitance in the redistribution layer than current silicon interposer-based systems. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "96", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hung:2025:RPL, author = "Chi-Chieh Hung and Yao-Yu Liao and Yi-Chao Shih and Tseng-Yi Chen", title = "{ReLoaDing} Performance: a Locality-Based Strategy for Rapid Reads in Encrypted Key--Value Systems", journal = j-TECS, volume = "24", number = "5s", pages = "97:1--97:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761810", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In key-value store systems, data security is often prioritized through compression and encryption of stored key-value pairs, ensuring protection against unauthorized access and breaches. However, these security measures introduce significant performance \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "97", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yao:2025:LSH, author = "Guangliang Yao and Tsun-Yu Yang and Yingjia Wang and Tseng-Yi Chen and Ming-Chang Yang", title = "Large or Small: Harnessing the Erase Duality of Emerging Bit-Alterable {NAND} Flash to Suppress Tail Latency", journal = j-TECS, volume = "24", number = "5s", pages = "98:1--98:21", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "High-density NAND flash has revolutionized the storage ecosystem because of its rapidly decreasing per-bit costs and unprecedented capacities. However, the inherent large block size of modern high-density NAND flash inevitably aggravates the reclamation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "98", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Buddhanoy:2025:POD, author = "Matchima Buddhanoy and Aleksandar Milenkovic and Sudeep Pasricha and Biswajit Ray", title = "Page-Overwrite Data Sanitization in {$3$D} {NAND} Flash: Challenges, Feasibility, and the {PULSE} Solution", journal = j-TECS, volume = "24", number = "5s", pages = "99:1--99:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761798", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Instant data deletion (or sanitization) in NAND flash devices is essential for achieving data privacy, but it remains challenging due to the mismatch between erase and write granularities, which leads to high overhead and accelerated wear. While page-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "99", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kenwright:2025:TPM, author = "Logan Kenwright and Partha Roop and Nathan Allen and Calin Cascaval and Avinash Malik", title = "{Timetide}: a Programming Model for Logically Synchronous Distributed Systems", journal = j-TECS, volume = "24", number = "5s", pages = "100:1--100:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3763794", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Massive strides in deterministic models have been made using synchronous languages. They are mainly focused on centralised applications, as the traditional approach is to compile away the concurrency. Time triggered languages such as Giotto and Lingua \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "100", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Huang:2025:LBC, author = "Zhijie Huang and Yulong Shi and Chengjia Zhao and Haoran Li and Nannan Zhao and Shujie Han and Xiao Zhang", title = "A Load-Balanced Collaborative Repair Algorithm for Single-Disk Failures in Erasure Coded Storage Systems", journal = j-TECS, volume = "24", number = "5s", pages = "101:1--101:18", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762648", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In large-scale cloud data centers and distributed storage systems, erasure coding is usually employed to enhance data availability and storage efficiency. However, with the explosive growth of data volume and the continuous expansion of storage system \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "101", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kumar:2025:RLB, author = "Gaurav Kumar and Kushal Pravin Nanote and Sohan Lal and Yamuna Prasad and Satyadev Ahlawat", title = "Robust {LFSR}-based Scrambling to Mitigate Stencil Attack on Main Memory", journal = j-TECS, volume = "24", number = "5s", pages = "102:1--102:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758321", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Main memory plays a pivotal role in the storage of computational data in a wide range of applications, including highly sensitive assets such as banking transactions, cryptographic keys, and user credentials. However, memory systems remain vulnerable to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "102", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cubero-Cascante:2025:CMD, author = "Jos{\'e} Cubero-Cascante and Lucas Tonini Rosenberg Schneider and Rebecca Pelke and Arunkumar Vaidyanathan and Rainer Leupers and Jan Moritz Joseph", title = "{CIMFlow}: Modelling Dataflow in Cross-Layer Compute-in-Memory Deep Learning Accelerators", journal = j-TECS, volume = "24", number = "5s", pages = "103:1--103:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760780", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Traditional Deep Learning Accelerators (DLAs) rely on off-chip memory to store large weight tensors, leading to high bandwidth demands and energy consumption. Compute-in-Memory (CIM) accelerators mitigate this by integrating high-density, non-volatile \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "103", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Yen:2025:CND, author = "Chih-Hsuan Yen and Hashan Roshantha Mendis and Tei-Wei Kuo and Pi-Cheng Hsiu", title = "Catch Non-determinism If You Can: Intermittent Inference of Dynamic Neural Networks", journal = j-TECS, volume = "24", number = "5s", pages = "104:1--104:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3757917", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Guaranteeing reliable deep neural network (DNN) inference despite intermittent power is the cornerstone of enabling intelligent systems in energy-harvesting environments. Existing intermittent inference approaches support static neural networks with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "104", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liu:2025:RTM, author = "Xiangfeng Liu and Zhe Jiang and Anzhen Zhu and Xiaomeng Han and Mingsong Lyu and Qingxu Deng and Nan Guan", title = "Re-thinking Memory-Bound Limitations in {CGRAs}", journal = j-TECS, volume = "24", number = "5s", pages = "105:1--105:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760386", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Coarse-Grained Reconfigurable Arrays (CGRAs) are specialized accelerators commonly employed to boost performance in workloads with iterative structures. Existing research typically focuses on compiler or architecture optimizations aimed at improving CGRA \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "105", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2025:LLB, author = "Zeming Ma and Jian Zhou and Yu Fu and Xiaochang Ma and Shuhan Bai and Fei Wu", title = "{Lemonade}: Learning-based Heterogeneous Metadata Offloading for Disaggregated Memory", journal = j-TECS, volume = "24", number = "5s", pages = "106:1--106:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761807", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Direct Access (DA) in Disaggregated Memory (DM) is a promising solution that meets the high-performance requirements of AI applications. However, it lacks effective support for metadata management, making metadata operations the major bottleneck. To \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "106", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Cao:2025:HHP, author = "Yongxiang Cao and Hongxu Jiang and Huiyong Li and Yu Tang and Dongcheng Shi and Guocheng Zhao", title = "{HMSA}: High-Performance Heterogeneous Mixed-Precision {CNN} Systolic Array Accelerator on {FPGA}", journal = j-TECS, volume = "24", number = "5s", pages = "107:1--107:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759458", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In power-constrained and real-time-demanding embedded scenarios, Field-Programmable Gate Arrays (FPGAs) emerge as ideal options for accelerating neural network inference, owing to the reconfigurability, high reliability, and flexibility of FPGAs. Mixed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "107", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fang:2025:FDE, author = "Yinjie Fang and Liping Yang and Weichen Liu and Guoquan Zhang and Yaoyao Gu and Xiang Xiao and Wei Qin and Xiangzhen Ouyang and Wanli Chang", title = "{FT-DAG}: an Efficient Full-Topology {DAG} Generator with Controllable Parameters", journal = j-TECS, volume = "24", number = "5s", pages = "108:1--108:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760781", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Directed Acyclic Graph (DAG) models are extensively utilized across fields such as automotive, wireless communication, and deep learning, to capture the inherent functional dependencies. Topology of DAG has a significant impact on the performance of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "108", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Fan:2025:GSF, author = "Zimeng Fan and Min Peng", title = "{GNNmap}: a Scalable Framework for {GNN} Deployment through Co-Optimized Graph Partitioning and Mapping", journal = j-TECS, volume = "24", number = "5s", pages = "109:1--109:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760530", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graph Neural Networks (GNNs) have become pivotal for analyzing relational data in embedded intelligent systems such as IOT devices. However, their deployment on resource-constrained devices faces critical barriers: traditional graph partitioning methods \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "109", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2025:EEA, author = "Jiyong Kim and Jaeho Lee and Jiahao Lin and Alish Kanani and Sun Miao and Umit Ogras and Jaehyun Park", title = "{eMamba}: Efficient Acceleration Framework for {Mamba} Models in Edge Computing", journal = j-TECS, volume = "24", number = "5s", pages = "110:1--110:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762190", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "State Space Model (SSM)-based machine learning architectures have recently gained significant attention for processing sequential data. Mamba, a recent sequence-to-sequence SSM, offers competitive accuracy with superior computational efficiency compared \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "110", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Morillas:2025:SIS, author = "Rafael Medina Morillas and Pengbo Yu and Alexandre Levisse and Dwaipayan Biswas and Marina Zapater and Giovanni Ansaloni and Francky Catthoor and David Atienza", title = "{SideDRAM}: Integrating {SoftSIMD} Datapaths near {DRAM} Banks for Energy-Efficient Variable Precision Computation", journal = j-TECS, volume = "24", number = "5s", pages = "111:1--111:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762641", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "By interfacing computing logic directly to the DRAM banks, bank-level Compute-near-Memory (CnM) architectures promise to mitigate the bottleneck at the memory interconnect. While this computation paradigm heavily reduces the energy requirements for data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "111", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Alsharkawy:2025:DDK, author = "Mohamed Alsharkawy and Hassan Nassar and Jeferson Gonz{\'a}lez-G{\'o}mez and Xun Xiao and Osama Abboud and J{\"o}rg Henkel", title = "{DPReF}: Decentralized Key Generation Using Physical-Related Functions", journal = j-TECS, volume = "24", number = "5s", pages = "112:1--112:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762187", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Physical Unclonable Functions (PUFs) serve as a lightweight source to generate cryptographic keys utilizing the inherent physical device properties, making them particularly suitable for resource-constrained environments such as Internet of Things (IoT) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "112", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2025:DDF, author = "Surajit Das and Abhijit Das and Chandan Karfa", title = "Developing Deadlock-Free Routing Algorithms in Torus {NoC}: a Formal Approach", journal = j-TECS, volume = "24", number = "5s", pages = "113:1--113:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762650", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Torus is a symmetric Network-on-Chip (NoC) topology with uniform node degree providing very high path diversity between a pair of source and destination. Moreover, the Wraparound Channels (WCs) in the torus can significantly reduce the hop count, thereby \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "113", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Agarwal:2025:SSC, author = "Yatharth Agarwal and Vijay Raghunathan", title = "{SecuPilot}: a Security Coprocessor-Integrated Platform for Autonomous {UAV} Security", journal = j-TECS, volume = "24", number = "5s", pages = "114:1--114:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762642", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article introduces SecuPilot, a Security Coprocessor-integrated platform designed to enhance the resilience and operational security of autonomous Unmanned Aerial Vehicles (UAVs) in increasingly adversarial environments. Recognizing the critical role \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "114", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{You:2025:MHS, author = "Dean You and Jieyu Jiang and Xiaoxuan Wang and Yushu Du and Zhihang Tan and Wenbo Xu and Hui Wang and Jiapeng Guan and Ran Wei and Shuai Zhao and Zhe Jiang", title = "{MERE}: Hardware-Software Co-Design for Masking Cache Miss Latency in Embedded Processors", journal = j-TECS, volume = "24", number = "5s", pages = "115:1--115:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762654", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Runahead execution is a technique to mask memory latency caused by irregular memory accesses. By pre-executing the application code during occurrences of long-latency operations and prefetching anticipated cache-missed data into the cache hierarchy, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "115", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Narang:2025:GEG, author = "Gaurav Narang and Chukwufumnanya Ogbogu and Biresh Kumar Joardar and Janardhan Rao Doppa and Krishnendu Chakrabarty and Partha Pratim Pande", title = "{GINA}: Exploiting Graph Neural Network Layer Features for Energy Efficient Inferencing in {NVM}-based {PIM} Accelerators", journal = j-TECS, volume = "24", number = "5s", pages = "116:1--116:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759918", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graph Neural Networks (GNNs) are made up of multiple layers, with each layer comprising of different compute kernels involving weight vectors and adjacency matrices of input graph dataset. These layers exhibit varying features such as sparsity, storage \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "116", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mayahinia:2025:SSB, author = "Mahta Mayahinia and Tommaso Marinelli and Zhenlin Pei and Hsiao-Hsuan Liu and Chenyun Pan and Zsolt Tokei and Francky Catthoor and Mehdi Tahoori", title = "System Scenario-Based Design of the Last-Level Cache in Advanced Interconnect-Dominant Technology Nodes", journal = j-TECS, volume = "24", number = "5s", pages = "117:1--117:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762649", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Feature size reduction of the front End of the Line (FEoL) and back End of the Line (BEoL) elements, i.e., transistors and interconnects, has been the main enabler of the next-generation computation systems. The decreasing trend of the cross-sectional \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "117", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Joshi:2025:PPG, author = "Sumedh Shridhar Joshi and Hwisoo So and Soyeong Park and Woobin Ko and Jinhyo Jung and Yohan Ko and Uiwon Hwang and Kyoungwoo Lee and Aviral Shrivastava", title = "{ProGIP}: Protecting Gradient-based Input Perturbation Approaches for {OOD} Detection From Soft Errors", journal = j-TECS, volume = "24", number = "5s", pages = "118:1--118:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761796", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Undetected out-of-distribution (OOD) inputs pose a significant threat to the reliability of deep learning models, as they may lead to unexpected behaviors during inference. Several studies have proposed effective OOD input detection methods. However, soft \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "118", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chow:2025:UFP, author = "Lok Yin Chow and Yingjia Wang and Yuhong Liang and Ming-Chang Yang", title = "Unlocking the Full Potential of Dual-Interface {SSDs}: a Comprehensive Hardware and Software Perspective", journal = j-TECS, volume = "24", number = "5s", pages = "119:1--119:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762153", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The legacy block interface for I/O benefits from data locality but faces challenges with I/O amplification due to the frequent small read-write operations common in most applications. Dual-Interface SSDs, which integrate block-interface Flash memory with \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "119", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Qiao:2025:EVR, author = "Haotian Qiao and Vidya Srinivas and Peter Dinda and Robert Dick", title = "Efficient Video Redaction at the Edge: Human Motion Tracking for Privacy Protection", journal = j-TECS, volume = "24", number = "5s", pages = "120:1--120:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762994", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Computationally efficient, camera-based, real-time human position tracking on low-end, edge devices would enable numerous applications, including privacy-preserving video redaction and analysis. Unfortunately, running most deep neural network based models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "120", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Modi:2025:FFA, author = "Garima Modi and Priyanka Singla and Neetu Jindal and Ayan Mandal and Preeti Panda", title = "{FARRE}: Fairness Aware Request Response Arbitration in Shared Caches", journal = j-TECS, volume = "24", number = "5s", pages = "121:1--121:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761811", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Contention in shared caches caused by concurrently executing applications can lead to overall performance degradation in multiprocessor systems-on-chip (MPSoCs). To address this issue, various shared cache arbitration techniques have been proposed to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "121", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2025:FGF, author = "Ke Wang and Yingnan Zhao and Ahmed Louri", title = "{FORT-GCN}: a Fault-Tolerant and Adaptive Accelerator Design for Efficient Graph Convolutional Network Inference", journal = j-TECS, volume = "24", number = "5s", pages = "122:1--122:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758094", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hardware reliability has emerged as a paramount concern for machine learning accelerators, as transient errors and permanent failures occurring during inference can severely compromise accuracy, performance, and service availability. Although fault \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "122", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rebel:2025:OOA, author = "Alles Rebel and Nikil Dutt and Bryan Donyanavard", title = "{OASIS}: Optimized Adaptive System for Intelligent {SLAM}", journal = j-TECS, volume = "24", number = "5s", pages = "123:1--123:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761808", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Visual Simultaneous Localization and Mapping (VSLAM) is essential for mobile autonomous systems operating in complex dynamic environments. VSLAM algorithms are computationally intensive and must execute in real-time on resource-constrained embedded \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "123", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Pottier:2025:DDH, author = "Juliette Pottier and Maria M{\'e}ndez Real and Bertrand {Le Gal} and Sebastien Pillement", title = "{DynHaMo}: Dynamic Hardware-Based Monitoring Dedicated to Attacks Detection", journal = j-TECS, volume = "24", number = "5s", pages = "124:1--124:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762646", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Numerous attacks compromising processor security have been developed over decades, including some targeting the microarchitecture, such as side-channel or transient attacks, or control-flow hijacking attacks. As these attacks target processor \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "124", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kanani:2025:TTA, author = "Alish Kanani and Lukas Pfromm and Harsh Sharma and Jana Doppa and Partha Pande and Umit Ogras", title = "{THERMOS}: Thermally-Aware Multi-Objective Scheduling of {AI} Workloads on Heterogeneous Multi-Chiplet {PIM} Architectures", journal = j-TECS, volume = "24", number = "5s", pages = "125:1--125:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762655", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Chiplet-based integration enables large-scale systems that combine diverse technologies, enabling higher yield, lower costs, and scalability, making them well-suited to AI workloads. Processing-in-Memory (PIM) has emerged as a promising solution for AI \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "125", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zandigohar:2025:GHG, author = "Mehrshad Zandigohar and Mallesham Dasari and Gunar Schirner", title = "{Grasp-HGN}: Grasping the Unexpected", journal = j-TECS, volume = "24", number = "5s", pages = "126:1--126:21", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762657", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For transradial amputees, robotic prosthetic hands promise to regain the capability to perform daily living activities. To advance next-generation prosthetic hand control design, it is crucial to address current shortcomings in robustness to out of lab \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "126", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{M:2025:SSI, author = "Praseetha M. and Madhu Mutyam and Venkata Kalyan Tavva", title = "Selective Subarray Isolation for Mitigating {RowHammer} Attack", journal = j-TECS, volume = "24", number = "5s", pages = "127:1--127:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762996", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "RowHammer is a severe circuit-level vulnerability in DRAM-based main memories that allows attackers to flip the bits stored in DRAM rows by repeatedly accessing the nearby rows. Due to density scaling, newer generation DRAM chips are found to be increasingly more vulnerable to RowHammer attacks, motivating researchers from both academia and industry to come up with new RowHammer attack patterns and mitigation strategies that can be widely adopted. However, the question remains whether the mitigation strategies available now can secure DRAM-based memory in the future.\par We propose three approaches to mitigate RowHammer attacks by exploiting subarray isolation. A subarray is a collection of DRAM rows in a DRAM bank where each subarray operates independently. In the first approach, known as Subarray Isolation (SI), data from different domains are allocated to separate subarrays in DRAM. The SI strategy naively allocates subarrays to domains, greatly hampering the bank-level parallelism in memory accesses, leading to a significant performance loss. The second approach, namely, Selective Subarray Isolation (SSI), improves this aspect. With the SSI strategy, we allocate only confidential data from different domains to separate subarrays. The non-confidential data of the domains will share the subarrays as in the conventional case. Our evaluations show that the SSI strategy performs better compared to state-of-the-art mitigation strategies when the amount of confidential data is less. To further improve performance, we propose the third approach, namely Finer Selective Subarray Isolation (FSSI), which allocates separate partitions protected with guard rows within a subarray to confidential data from different domains.\par Our evaluations show that, of the three approaches, the FSSI strategy performs the best. Compared to baseline without any RowHammer protection, the FSSI strategy experiences an average performance drop of 0.89\% for 50\% of confidential data, but for 10\% and 20\% of confidential data, it shows an improvement of 1.43\% and 1.28\%, respectively. We also observe that the FSSI strategy is the most energy efficient among the state-of-the-art RowHammer mitigation techniques. Note that all our proposed strategies do not incur hardware overhead for performing RowHammer mitigation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "127", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Das:2025:SVE, author = "Smita Das and Amit Jana and Debdeep Mukhopadhyay", title = "A Severe Vulnerability and an Effective Defense Against {DFA} on {Ascon}", journal = j-TECS, volume = "24", number = "5s", pages = "128:1--128:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762192", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Differential Fault Attack (DFA) is a powerful cryptanalytic technique for recovering cryptographic keys by exploiting computational faults. At Indocrypt 2024, the first DFA on Ascon was introduced using a bit-flip fault model to recover a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "128", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Meshram:2025:TGM, author = "Suraj Meshram and Arnab Sarkar and Arijit Mondal", title = "A Tunable Generic Meta-Heuristic Framework for Balancing Assembly Line Systems in Manufacturing", journal = j-TECS, volume = "24", number = "5s", pages = "129:1--129:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762189", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-Physical Systems controlling assembly line operations are central to manufacturing processes. Assembly line systems have diversified over time, depending on multiple factors, including the products being manufactured, the workstations and resources \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "129", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wei:2025:AAS, author = "Yi-Cheng Wei and Yi-Chieh Tsou and Yong-Cheng Chen and Li-Pin Chang", title = "App-aware Swap Resource Allocation for Enhancing User-perceived Latency on Mobile Devices", journal = j-TECS, volume = "24", number = "5s", pages = "130:1--130:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760385", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For smart mobile devices such as smartphones, daily usage involves a large set of apps and frequent app switching. App-switching latency is highly perceptible to users and is heavily subject to swap efficiency. Hybrid swapping, which combines compressed \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "130", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tasche:2025:DVC, author = "Philip Tasche and Paula Herber and Marieke Huisman", title = "Deductive Verification of Cooperative {RTOS} Applications", journal = j-TECS, volume = "24", number = "5s", pages = "131:1--131:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759251", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Embedded systems are used in many safety-critical domains, including in medicine, traffic, and critical infrastructure. Due to the strict timing requirements such systems usually have to fulfill, they often run on real-time operating systems (RTOS). As \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "131", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Willemsen:2025:TSP, author = "Lars Willemsen and Mario G{\"u}nzel and Bj{\"o}rn Brandenburg and Georg von der Br{\"u}ggen and Ching-Chi Lin and Jian-Jia Chen", title = "Transfer Schedulability in Periodic Real-Time Systems", journal = j-TECS, volume = "24", number = "5s", pages = "132:1--132:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3763236", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "We introduce and study transfer schedulability, a novel concept that describes how properties of a reference schedule derived from a scheduling algorithm $ \mathcal {A} $ are transferred onto another scheduling algorithm $ \mathcal {B} $ for a given \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "132", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2025:SDS, author = "Jiwon Kim and Geon Kim and Jeho Lee and Thiemo Voigt and Hojung Cha", title = "{SecureRide}: Detecting Safety-Threatening Behavior of E-Scooters Using Battery Information", journal = j-TECS, volume = "24", number = "5s", pages = "133:1--133:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758095", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Reckless usage of electric (e-) scooters causes many injury accidents, raising critical safety concerns. Despite newly introduced regulations, specifically, speed limits and sidewalk driving prohibitions, the number of accidents increases due to the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "133", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhao:2025:SGS, author = "Yiqi Zhao and Xinyi Yu and Bardh Hoxha and Georgios Fainekos and Jyotirmoy Deshmukh and Lars Lindemann", title = "{STL-GO}: Spatio-Temporal Logic with Graph Operators for Distributed Systems with Multiple Network Topologies", journal = j-TECS, volume = "24", number = "5s", pages = "134:1--134:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760258", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Multi-agent systems (MASs) consisting of a number of autonomous agents that communicate, coordinate, and jointly sense the environment to achieve complex missions can be found in a variety of applications such as robotics, smart cities, and internet-of-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "134", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2025:RTV, author = "Ruiqi Wang and Zichen Wang and Peiqi Gao and Mingzhen Li and Jaehwan Jeong and Yihang Xu and Yejin Lee and Carolyn Baum and Lisa Connor and Chenyang Lu", title = "Real-Time Video-Based Human Action Recognition on Embedded Platforms", journal = j-TECS, volume = "24", number = "5s", pages = "135:1--135:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761795", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advances in computer vision and deep learning have made video-based Human Action Recognition (HAR) increasingly feasible. However, running HAR on live video streams encounters significant delays on embedded platforms due to computational demands. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "135", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shi:2025:CBR, author = "Yuhui Shi and Yuming Wu and Lei Bu and Xuandong Li", title = "Checking Bounded Reachability of Compositional Linear Hybrid Automata Using Interaction Relations", journal = j-TECS, volume = "24", number = "5s", pages = "136:1--136:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762645", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For compositional linear hybrid automata (CLHA), whose dynamics can be characterized by linear constraints, bounded model checking (BMC) is challenging due to the complexity caused by interactions among member automata. Classical BMC approaches encode ... \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "136", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Matsumoto:2025:EBB, author = "Tsubasa Matsumoto and Kazuki Watanabe and Kohei Suenaga and Masaki Waga", title = "Efficient Black-Box Checking with Specification-Guided Abstraction", journal = j-TECS, volume = "24", number = "5s", pages = "137:1--137:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762659", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Cyber-physical systems (CPSs) often contain components whose internal design is unknown, making their verification challenging. Although black-box checking (BBC)-an automated black-box testing method that combines automata learning and model checking-can \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "137", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Naik:2025:CEL, author = "Nikhil Vijay Naik and Alessandro Pinto and Pierluigi Nuzzo", title = "Contract Embeddings for Layered Control Architectures", journal = j-TECS, volume = "24", number = "5s", pages = "138:1--138:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3764587", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The design of complex cyber-physical system architectures is often hierarchical. System specifications are mapped to an implementation layer via a stepwise refinement process involving multiple intermediate layers. These layers may capture different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "138", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sun:2025:SSA, author = "Binqi Sun and Bohua Zou and Yigong Hu and Tomasz Kloda and Ling Wang and Tarek Abdelzaher and Marco Caccamo", title = "{SAPar}: a Surrogate-Assisted {DNN} Partitioner for Efficient Inferences on Edge {TPU} Pipelines", journal = j-TECS, volume = "24", number = "5s", pages = "139:1--139:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761813", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Pipelining deep neural networks (DNNs) across multiple Edge Tensor Processing Units (TPUs) can enhance on-device performance by increasing the capacity for DNN parameters caching and enabling pipeline parallelism. Effective deployment on pipelined Edge \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "139", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2025:TAN, author = "Jiale Chen and Duc Van Le and Yuanchun Li and Yunxin Liu and Rui Tan", title = "{TimelyNet}: Adaptive Neural Architecture for Autonomous Driving with Dynamic Deadline", journal = j-TECS, volume = "24", number = "5s", pages = "140:1--140:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762652", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To maintain driving safety, the execution of neural network-based autonomous driving pipelines must meet the dynamic deadlines in response to the changing environment and vehicle's velocity. To this end, this article proposes a real-time neural \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "140", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Peeck:2025:TEM, author = "Jonas Peeck and Rolf Ernst and Selma Saidi", title = "Towards Efficient Multi-Frame Clustering in Response Time Analysis for Large Object Communication", journal = j-TECS, volume = "24", number = "5s", pages = "141:1--141:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758323", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In autonomous systems, growing sizes of application data, primarily related to perception tasks, have to be transmitted over communication infrastructures that provide higher data rates. Knowledge and exploitation of the clustered structure of multi-frame \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "141", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Heider:2025:LLE, author = "Kay Heider and Christian Hakert and Kuan-Hsun Chen and Jian-Jia Chen", title = "{LazyTick}: Lazy and Efficient Management of Job Release in Real-Time Operating Systems", journal = j-TECS, volume = "24", number = "5s", pages = "142:1--142:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762651", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Releasing jobs and performing scheduling decisions in real-time operating systems (RTOSes) is often realized within tick interrupts. In each tick interrupt, a set of tasks that are waiting to release new jobs, namely, the waiting set, is inspected to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "142", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{vanWanrooij:2025:SSS, author = "Joep van Wanrooij and Twan Basten and Marc Geilen", title = "Schedule Synthesis for Synchronous Dataflow Models with Lower and Upper Timing Bounds", journal = j-TECS, volume = "24", number = "5s", pages = "143:1--143:28", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762643", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Homogeneous Synchronous DataFlow Graphs (HSDFGs) have become a popular method for analysing the performance of manufacturing systems. Manufacturing tasks, modelled by actor firings in an HSDFG, are bounded by their earliest possible starting times, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "143", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ma:2025:DLR, author = "Zimo Ma and Xiangzhong Luo and Qun Song and Rui Tan", title = "Dynamic Layer Routing Defense for Real-Time Embedded Vision", journal = j-TECS, volume = "24", number = "5s", pages = "144:1--144:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762191", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Deep neural networks have advanced the perception and decision-making functions of smart embedded systems, such as car-borne driver assistance. Deploying these embedded neural networks often faces two challenges: (i) security vulnerabilities to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "144", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2025:ETW, author = "Seonghoon Park and Jiwon Kim and Jeho Lee and Hojung Cha", title = "{Ember}: Task Wakeup Sequence-Based Energy Optimization for Mobile {Web} Browsing", journal = j-TECS, volume = "24", number = "5s", pages = "145:1--145:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3757918", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Existing Android systems exhibit energy inefficiency during mobile web browsing due to the lack of awareness of application-level context. Inferring such context from system-level data alone is challenging, but one promising opportunity is using the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "145", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Seidler:2025:WIE, author = "Maximilian Seidler and Alexander Krause and Peter Ulbrich", title = "{Wasm-IO}: Enabling Low-Level Device Interaction in {WebAssembly} for Industry Automation", journal = j-TECS, volume = "24", number = "5s", pages = "146:1--146:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760387", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Certification on a component level is highly beneficial in industrial automation because it allows for independent verification and updates without compromising the reliability of the overall system. Containerization technologies naturally address this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "146", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Melani:2025:TST, author = "Beatrice Melani and Ezio Bartocci and Michele Chiari", title = "A Tree-Shaped Tableau for Checking the Satisfiability of Signal Temporal Logic with Bounded Temporal Operators", journal = j-TECS, volume = "24", number = "5s", pages = "147:1--147:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3759917", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Signal Temporal Logic (STL) is a widely recognized formal specification language to express rigorous temporal requirements on mixed analog signals produced by cyber-physical systems (CPS). A relevant problem in CPS design is how to efficiently and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "147", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Banerjee:2025:FAT, author = "Debarpita Banerjee and Parasara Sridhar Duggirala and Bineet Ghosh and Sumana Ghosh", title = "A Formal Approach towards Safe and Stable Schedule Synthesis in Weakly Hard Control Systems", journal = j-TECS, volume = "24", number = "5s", pages = "148:1--148:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3760528", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Real-time scheduling of multiple control tasks in a weakly hard setting is an emerging research direction, as it offers a more flexible and feasible environment for task scheduling. This is especially pertinent for resource-constrained embedded \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "148", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chen:2025:CTS, author = "Hongkai Chen and Zeyu Zhang and Shouvik Roy and Ezio Bartocci and Scott A. Smolka and Scott Stoller and Shan Lin", title = "Cumulative-Time Signal Temporal Logic", journal = j-TECS, volume = "24", number = "5s", pages = "149:1--149:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3763237", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Signal Temporal Logic (STL) is a widely adopted specification language for Cyber-Physical Systems that can be used to express critical temporal requirements, such as system safety and response time. STL's expressivity, however, is not sufficient to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "149", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Lin:2025:QSS, author = "Shaokai Lin and Erling Jellum and Mirco Theile and Tassilo Tanneberger and Binqi Sun and Chadlia Jerad and Yimo Xu and Guangyu Feng and Magnus M{\ae}hlum and Jian-Jia Chen and Martin Schoeberl and Linh Thi Xuan Phan and Jeronimo Castrillon and Sanjit A. Seshia and Edward A. Lee", title = "Quasi-Static Scheduling for Deterministic Timed Concurrent Models on Multi-Core Hardware", journal = j-TECS, volume = "24", number = "5s", pages = "150:1--150:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762653", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "To design performant, expressive, and reliable cyber-physical systems (CPSs), researchers extensively perform quasi-static scheduling for concurrent models of computation (MoCs) on multi-core hardware. However, these quasi-static scheduling approaches are ... \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "150", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Abotaleb:2025:CHS, author = "Abdelrhman Mohamed Abotaleb and Mohamed Hassan", title = "The Case for {HW\slash SW} Harmony in Real-Time Systems: Tightening Memory Latency of Streaming Applications", journal = j-TECS, volume = "24", number = "5s", pages = "151:1--151:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762647", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modern critical cyber-physical systems such as autonomous vehicles, drones, and real-time medical monitoring, demand not only intensive data processing but also stringent adherence to real-time performance constraints. These applications often involve \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "151", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Gupta:2025:SSB, author = "Lipsy Gupta and Pavithra Prabhakar", title = "Star-Set Based Efficient Reachable Set Computation of Anytime Sensing-Based Neural Network-Controlled Dynamical Systems", journal = j-TECS, volume = "24", number = "5s", pages = "152:1--152:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762658", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this article, we consider the problem of reachable set computation of a closed-loop system with anytime sensor and a neural network controller. We provide a star set data structure-based forward propagation algorithm that uses existing efficient \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "152", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Eisenklam:2025:RRA, author = "Abigail Eisenklam and Robert Gifford and Georgiy A Bondar and Yifan Cai and Tushar Sial and Linh Thi Xuan Phan and Abhishek Halder", title = "{Rasco}: Resource Allocation and Scheduling Co-design for {DAG} Applications on Multicore", journal = j-TECS, volume = "24", number = "5s", pages = "153:1--153:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761814", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As multicore hardware becomes increasingly prevalent in real-time embedded systems, traditional scheduling techniques that assume a single worst-case execution time for each task are no longer adequate, as they fail to account for the impact of shared \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "153", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mandal:2025:DPC, author = "Dipankar Mandal and Arnab Sarkar and Arijit Mondal", title = "A Discrete Partial Charging Enabled Dynamic Programming Strategy for Optimal Fixed-Route Electric Vehicle Charging", journal = j-TECS, volume = "24", number = "5s", pages = "154:1--154:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762188", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rapid adoption of Electric Vehicles (EVs), driven by stringent environmental regulations and rising fuel costs, is reshaping the landscape of Vehicle Routing Problems (VRP). This shift has led to the Electric Vehicle Routing Problem (EVRP), which \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "154", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Subramaniyan:2025:FGF, author = "Srinivasan Subramaniyan and Xiaorui Wang", title = "{FC-GPU}: Feedback Control {GPU} Scheduling for Real-time Embedded Systems", journal = j-TECS, volume = "24", number = "5s", pages = "155:1--155:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761812", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Mon Oct 6 06:40:41 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "GPUs have recently been adopted in many real-time embedded systems. However, existing GPU scheduling solutions are mostly open-loop and rely on the estimation of worst-case execution time (WCET). Although adaptive solutions, such as feedback control \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "155", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Chakrabarty:2025:VRD, author = "Yashashwee Chakrabarty and Akanksha Dixit and Smruti R. Sarangi", title = "{VoxDepth}: Rectification of Depth Images on Edge Devices", journal = j-TECS, volume = "24", number = "6", pages = "156:1--156:27", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3763793", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Autonomous mobile robots like self-flying drones and industrial robots heavily depend on depth images to perform tasks such as 3D reconstruction and visual SLAM. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "156", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2025:BCI, author = "Qizhi Zhang and Ya Gao and Haocheng Ma and Jiaji He and Yiqiang Zhao and Xiaolong Guo", title = "Boosting Cryptographic {ICs'} Side-Channel Resistance: a Formal Framework for Automatic Identification and Protection of Leaky Paths", journal = j-TECS, volume = "24", number = "6", pages = "157:1--157:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768154", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Side-channel analysis (SCA) attacks pose a significant threat to cryptographic integrated circuits (ICs). While designers have endeavored to introduce various countermeasures \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "157", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mondal:2025:DQL, author = "Mainak Mondal and Fei Dou and Jinbo Bi and Song Han", title = "Deep {Q}-Learning-Based Mobile Charger Path Planning in Wireless Powered Communication Networks", journal = j-TECS, volume = "24", number = "6", pages = "158:1--158:32", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3763235", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Wireless Powered Communication Network (WPCN) is a new paradigm to allow low-power wireless devices to exchange data packets and receive stable energy transfer from a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "158", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Akhunov:2025:ACM, author = "Khakim Akhunov and Kasim Sinan Yildirim and Jongouk Choi and Changhee Jung", title = "Adaptive Computing in Memory Meets Conventional Batteryless Platforms", journal = j-TECS, volume = "24", number = "6", pages = "159:1--159:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765623", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Computing In-Memory (CIM) with emerging nonvolatile memory (NVM) technologies is promising for batteryless systems since it removes the need for explicit backup \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "159", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Hu:2025:PSA, author = "Xianghong Hu and Chaoming Yang and Xueming Li and Rongfeng Li and Yuanmiao Lin and Shansen Fu and Hongmin Huang and Shuting Cai and Xiaoming Xiong", title = "A Precision-Scalable Accelerator with Sign-Magnitude Representation and Dual Adder Trees", journal = j-TECS, volume = "24", number = "6", pages = "160:1--160:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767336", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Currently, there are two mainstream acceleration methods; one is mixed precision and the other is sparsity. Few accelerators support both mixed precision and sparsity, and most enable \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "160", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kang:2025:FMR, author = "Woosuk Kang and EunJin Jeong and Kyonghwan Yoon and Soonhoi Ha", title = "A Framework for Multi-Robot Programming: From High-Level Specification to Retargetable Deployment", journal = j-TECS, volume = "24", number = "6", pages = "161:1--161:45", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747325", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In addition to the various requirements that a multi-robot framework should meet, swarm robotics applications also demand robustness, flexibility, and scalability. While several \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "161", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Abraham:2025:RAE, author = "Danny Abraham and Biswadip Maity and Bryan Donyanavard and Nikil Dutt", title = "Runtime Adaptivity for Efficient Neural Network Inference on Autonomous Systems", journal = j-TECS, volume = "24", number = "6", pages = "162:1--162:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762640", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Neural network pruning and dynamic training have emerged as key techniques for optimizing deep learning models to meet the constraints of resource-limited systems. However, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "162", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sarwar:2025:EIW, author = "Mir Md Sajid Sarwar and Rajarshi Ray", title = "Exploring Inevitable Waypoints for Unsolvability Explanation in Hybrid Planning Problems", journal = j-TECS, volume = "24", number = "6", pages = "163:1--163:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767745", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Explaining unsolvability of planning problems is of significant research interest in Explainable AI Planning. A number of research efforts on generating explanations of solutions to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "163", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2025:SSD, author = "Vishesh Mishra and Dipesh and Sparsh Mittal and Urbi Chatterjee", title = "{SATGuard}: {SAT}-driven Countermeasures for Protecting Approximate Circuits from Hardware {Trojan}", journal = j-TECS, volume = "24", number = "6", pages = "164:1--164:29", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3766894", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Approximate arithmetic circuits have gained prominence in modern computing systems due to their ability to trade accuracy for improved performance and energy efficiency. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "164", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Agarwal:2025:FDA, author = "Ayushi Agarwal and Pulkit Goel and P. J. Joseph and Prokash Ghosh and Sourav Roy and Preeti Ranjan Panda", title = "{FLASH}: Deadline-Aware Flexible {LLC} Arbitration and Scheduling for Hardware Accelerators", journal = j-TECS, volume = "24", number = "6", pages = "165:1--165:34", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3757742", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Integrating domain-specific hardware accelerators on modern systems on chips (SoCs) has enabled complex applications, such as vision, natural language processing, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "165", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Burns:2025:SFM, author = "Alan Burns and Cliff Jones", title = "A Specification Framework for Mixed-Criticality Scheduling Protocols", journal = j-TECS, volume = "24", number = "6", pages = "166:1--166:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765522", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "This article presents a general formal framework for describing the relationship between a criticality-aware scheduler, a set of application jobs that are assigned different \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "166", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2025:OSM, author = "Huachen Zhang and Jianyang Ding and Bowen Jiang and Tianshuo Lu and Wei Xu and Zhilei Chai", title = "Optimizing Sparse Matrix Convolution on {RISC-V} Core: Custom Instructions for Embedded System", journal = j-TECS, volume = "24", number = "6", pages = "167:1--167:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3756322", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "With the increasing demand for deep neural network (DNN) inference tasks on embedded platforms, deploying compute-intensive DNNs on resource-constrained embedded \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "167", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kim:2025:OAG, author = "Hyunjun Kim and Hwajeong Seo", title = "Optimizing {AES-GCM} on 32-Bit {ARM Cortex-M4} Microcontrollers: Fixslicing and {FACE}-Based Approach", journal = j-TECS, volume = "24", number = "6", pages = "168:1--168:24", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3766074", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Advanced Encryption Standard (AES) in Galois/Counter Mode (GCM) delivers both confidentiality and integrity, yet poses performance and security challenges \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "168", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Park:2025:DAS, author = "Seonghoon Park and Junick Ahn and Daeyong Kim and Hojung Cha", title = "Duration-Aware Sound Event Detection on Ultra-Low-Power Sensor Devices", journal = j-TECS, volume = "24", number = "6", pages = "169:1--169:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761806", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Sound event detection (SED) based on on-device machine learning (ML) presents considerable energy challenges for ultra-low-power sensor devices. In this article, we propose \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "169", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Shukla:2025:FMT, author = "Nikhil Shukla and Kai Ni and Sam Stevenson and Vijaykrishnan Narayanan", title = "Ferroelectric Memory Technology for Big Data Applications", journal = j-TECS, volume = "24", number = "6", pages = "170:1--170:20", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3764868", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Big Data has an insatiable appetite for larger and better-performing memory. While current memory technologies continue to advance, the performance gaps in current memory \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "170", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Anthimopoulos:2025:OTT, author = "Theologos Anthimopoulos and Milad Kokhazadeh and Vasilios Kelefouras and Benjamin Himpel and Georgios Keramidas", title = "Optimizing Tensor Train Decomposition in {DNNs} for {RISC-V} Architectures Using Design Space Exploration and Compiler Optimizations", journal = j-TECS, volume = "24", number = "6", pages = "171:1--171:34", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768624", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/risc-v.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Deep neural networks (DNNs) have become indispensable in many real-life applications like natural language processing, and autonomous systems. However, deploying DNNs on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "171", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Valente:2025:NHS, author = "Giacomo Valente and Vittoriano Muttillo and Luigi Pomante and Daniele Frigioni and Tania {Di Mascio}", title = "A New {HW\slash SW} Co-Design Approach for Monitored Systems-on-Chip Development", journal = j-TECS, volume = "24", number = "6", pages = "172:1--172:37", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769075", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "As embedded systems are required to satisfy increasing functional and non-functional requirements, heterogeneous systems-on-chip architectures are progressively \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "172", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Klashtorny:2025:OSP, author = "Artem Klashtorny and Mahesh Tripunitara and Hiren Patel", title = "Optimal Split Point Placement for Predictable {GPU} Wavefront Splitting", journal = j-TECS, volume = "24", number = "6", pages = "173:1--173:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769118", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Nov 4 11:17:51 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Predictable wavefront splitting (PWS) is an optimization technique for graphics processing units (GPUs) to address the performance and worst-case execution time (WCET) \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "173", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Ji:2026:HPA, author = "Xiaoyu Ji and Cheng Chen and Gangqiang Yang and Hongchao Zhou and Hailiang Xiong and Xianye Ben and Zhiguo Wan", title = "High-Performance Accelerator for Constant-Time Cross-Domain Integer and {Montgomery} Inversion on {FPGA}", journal = j-TECS, volume = "25", number = "1", pages = "1:1--1:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777365", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Modular Inversion (MI) is one of the fundamental arithmetic operations in the finite field, which plays an essential role in various cryptographic applications and requires high performance and security. Unfortunately, the simple MI algorithm is vulnerable to side-channel attacks, such as the timing attack, which can compromise the cryptographic system by analyzing the time taken to execute cryptographic algorithms. Attackers may recover the initial data since the time can differ based on the input. Besides, the low complexity and low resource consumption of hardware implementations in MI are also challenging. In this article, we propose two novel modular inversion algorithms, named Constant-Time Integer Modular Inversion (CT-IMI) and Constant-Time Complementary Montgomery Modular Inversion (CT-CMMI). They both consist of constant iteration rounds to resist the timing attack. CT-IMI processes the data in the integer field, which is designed for common scenarios. CT-CMMI is suitable for the cross-domain case, which can directly use data in the Montgomery domain and avoid the conversion steps for some specific applications, e.g., scalar multiplication in Elliptic Curve Cryptography (ECC). In software simulations, we measure the average clock cycles for a single inversion and illustrate the relationship between various bit lengths and the latency. The significant differences between constant and non-constant algorithms demonstrate the vulnerability of modular inversion to timing attacks. In addition, we design two efficient hardware architectures on FPGA. Experimental results show that our CT-IMI can finish a single inversion in 2.56 s with 4.2k LUTs, 1.8k FFs, and our CT-CMMI requires 2.45 s with 2.7k LUTs, 1.6k FFs. The product of area and latency of our CT-IMI and CT-CMMI can reach 10.50 and 6.62, respectively, which shows optimal performance compared with all the results in the existing literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "1", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Rajappa:2026:OHE, author = "Anuj Justus Rajappa and Laura Smets and Philippe Reiter and Paolo Rech and Ynte Vanderhoydonc and Ritesh Kumar Singh and Siegfried Mercelis and Jeroen Famaey", title = "Optimized Hyperdimensional Edge {AI} Evaluation for Efficiency and Reliability under Real Radiation", journal = j-TECS, volume = "25", number = "1", pages = "2:1--2:24", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3773032", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Hyperdimensional Computing (HDC) is an emerging AI algorithm, touted to be an efficient, neuro-inspired and reliable alternative to neural networks for Edge AI. HDC utilizes hypervectors with several thousand elements; the number of elements in these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "2", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhang:2026:CQA, author = "Guanglin Zhang and Yuhao Zhang and Xiaowen Huang and Wenqian Zhang", title = "Coinf: {QoS}-aware {DRL}-based Inference Task Scheduling Framework with Batching Processing", journal = j-TECS, volume = "25", number = "1", pages = "3:1--3:20", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777373", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The emergence of deploying Deep neural network (DNN) services on edge servers has spurred research into efficiently provisioning inference services. However, previous studies have neglected to consider the implications of different types of DNN and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "3", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tessler:2026:NOC, author = "Corey Tessler and Venkata Prashant Modekurthy and Nathan Fisher and Abusayeed Saifullah and Alleyn Murphy", title = "Near-Optimal Cache Sharing through Co-Located Parallel Scheduling of Threads", journal = j-TECS, volume = "25", number = "1", pages = "4:1--4:28", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3770858", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "For hard-real time systems, cache memory increases execution time variability, increasing the complexity of timing analysis. As such, cache memory is often treated exclusively as a detractor to schedulability. Cache-aware co-located scheduling aims at \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "4", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Tehseen:2026:SCS, author = "Muhammad Danish Tehseen and Gyeongcheol Shin and Joo-Young Kim and Youjip Won", title = "{SeeSSD}: Computational Storage for Energy-Efficient Real-Time Object Detection", journal = j-TECS, volume = "25", number = "1", pages = "5:1--5:27", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774649", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In this work, we present our intelligent SSD, SeeSSD, an energy-efficient computational SSD for a real-time object detection system. SeeSSD embeds an FPGA-based CNN processing engine and the firmware that performs the convolutional operation on the target \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "5", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bernardi:2026:VSC, author = "Paolo Bernardi and Giorgio Insinga and Matteo Battilana and Peter Beer and Giambattista Carnevale and Matteo Coppetta and Nellina Mautone and Alberto Repele and Pierre Scaramuzza and Rudolf Ullmann", title = "A Versatile Strategy for Comprehensive Data Collection and Retention in Embedded {SoC} Memories", journal = j-TECS, volume = "25", number = "1", pages = "6:1--6:15", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3766550", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In modern automotive system-on-chip (SoC) designs, large embedded flash memories have become a standard feature. Since they occupy a significant percentage of the die area, their impact on the SoCs' overall yield is substantial, making them a critical \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "6", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Su:2026:GBP, author = "Yu-Zheng Su and Huan-Chun Yeh and Chun-Han Lin", title = "Guidance-based Power Conservation Framework for User-interface Developers on Mobile Devices", journal = j-TECS, volume = "25", number = "1", pages = "7:1--7:20", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3776742", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Mobile applications have been seamlessly integrated into our daily lives. When using mobile devices, the energy efficiency of these applications plays a pivotal role in enhancing the user experience. However, it is noteworthy that incorporating power \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "7", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Liedtke:2026:EEB, author = "Lukas Liedtke and Per Gunnar Kjeldsberg and Frank Alexander Kraemer and Magnus Jahre", title = "{EStacker}: Explaining Battery-Less {IoT} System Performance with Energy Stacks", journal = j-TECS, volume = "25", number = "1", pages = "8:1--8:28", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3772371", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The number of Internet of Things (IoT) devices is increasing exponentially, and it is environmentally and economically unsustainable to power all these devices with batteries. The key alternative is energy harvesting, but battery-less IoT systems require \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "8", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Callanan:2026:SCS, author = "Gareth Callanan and Flavius Gruian", title = "Specifying and Compiling Scalable Networks of Actors for Software and Hardware Platforms", journal = j-TECS, volume = "25", number = "1", pages = "9:1--9:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774886", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Streaming applications are often described using dataflow actor models with a fixed network structure, allowing for static analysis and efficient hardware implementation. However, this fixed structure hinders scalability and design space exploration. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "9", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Zhou:2026:SHS, author = "Xinbing Zhou and Shaobo Shi and Shaohan Liu and Peng Hao and Yunxiang Tang and Tiancheng Tang and Yi Man and Dake Liu", title = "{Sayram}: a Hardware-software Co-design to Accelerate Wireless Baseband Processing", journal = j-TECS, volume = "25", number = "1", pages = "10:1--10:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3776744", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Micro base stations, with limited antennas and extensive deployment, require scaled-down hardware. Software-defined radio solutions (e.g., CPU, many-core systems, GPU) offer flexibility but incur high area and power costs, while traditional DSP lacks \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "10", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Sarma:2026:SSM, author = "Richa Sarma and Sanjay Moulik", title = "{SAMIT}: Secure Multi-Authority Access Control with Dynamic Attribute Updates for Embedded {IoT-CPS}", journal = j-TECS, volume = "25", number = "1", pages = "11:1--11:23", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774757", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "In today's world, Cyber-Physical Systems (CPS) play a key role in areas like smart homes, transportation, and healthcare, where lightweight IoT devices are used to monitor and control activities. As the number of IoT devices keeps growing, so does the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "11", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Afifi:2026:AST, author = "Salma Afifi and Oluwaseun Alo and Ishan Thakkar and Sudeep Pasricha", title = "{ASTRA}: a Stochastic Transformer Neural Network Accelerator with Silicon Photonics", journal = j-TECS, volume = "25", number = "1", pages = "12:1--12:24", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3769092", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Transformers have emerged as a dominant architecture in deep learning, demonstrating unparalleled success across a wide range of applications, including natural language processing (NLP), computer vision (CV), and scientific computing. By leveraging the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "12", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhattacharjya:2026:AAA, author = "Rajat Bhattacharjya and Arnab Sarkar and Ish Kool and Sabur Baidya and Nikil Dutt", title = "{ACCESS-AV}: Adaptive Communication-Computation Codesign for Sustainable Autonomous Vehicle Localization in Smart Factories", journal = j-TECS, volume = "25", number = "1", pages = "13:1--13:24", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3771770", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Autonomous Delivery Vehicles (ADVs) are increasingly used for transporting goods in 5G network-enabled smart factories, with the compute-intensive localization module presenting a significant opportunity for optimization. We propose ACCESS-AV, an energy-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "13", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Wang:2026:SDM, author = "Jiajie Wang and Saumya Shankar and Partha S. Roop", title = "{Softtide}: a Deterministic Middleware for Real-Time Systems", journal = j-TECS, volume = "25", number = "1", pages = "14:1--14:25", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3774891", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Correct synchronisation in a distributed system is a difficult. One effective approach to the problem is to employ a logical clock on the high-level design, which ensures deterministic concurrency. However, most real-time network protocols only provide \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "14", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Kaneko:2026:SCS, author = "Hayata Kaneko and Ryuto Ishibashi and Lin Meng", title = "{SIMD-CP}: {SIMD} with Redundant Bits Compression and Mixed-Precision Packing for Quantized {DNNs}", journal = j-TECS, volume = "25", number = "1", pages = "15:1--15:20", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3771939", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Deploying deep neural networks (DNNs) on edge devices presents notable challenges, including execution time, power consumption, and memory footprint. To address these limitations, the co-design of software-based model compression techniques and dedicated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Mishra:2026:DMR, author = "Vishesh Mishra and Mahendra Rathor and Urbi Chatterjee", title = "Dual-Mode Rounding Algorithms and Hardware for Posit-Based {DNN} Training: The Future of Mixed Precision Frameworks", journal = j-TECS, volume = "25", number = "1", pages = "16:1--16:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3772092", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The Posit number system provides a promising alternative to traditional floating-point (FP) formats for deep neural network (DNN) training by offering tapered precision and a wide dynamic range, addressing key limitations of conventional FP formats. While recent research has demonstrated the advantages of Posit-enabled training and inference for fixed-precision applications, the development of mixed-precision frameworks has been hindered by the absence of rounding algorithms for transitioning between Posit formats. This dependency has limited the practical adoption of Posits in DNN workflows. In this article, we present a Posit-based Mixed Precision Training and Inference (PMP) framework, leveraging Posit32, Posit16, and Posit8 for distinct computational stages. Posit32 ensures numerical stability in critical operations, Posit16 balances precision and efficiency for intermediate computations, and Posit8 significantly reduces memory usage during inference. Specifically, we introduce algorithms for converting Posit32 representations into Posit16 and Posit8, and vice versa, under two rounding modes: deterministic and stochastic. Stochastic rounding is employed to mitigate precision loss in low-precision arithmetic. Furthermore, we propose a hardware-efficient Posit Multiply-Accumulate (pMAC) Unit that integrates deterministic and stochastic rounding modules, enabling efficient mixed-precision computations. We validate our framework on ResNet-18, ResNet-50, ResNet-152, MobileNet-v2, VGG-16, and EfficientNet-B7 (trained on ImageNet), YOLOv2 (trained on PASCAL VOC 2012), and BERT (trained on WikiText-2). Experimental results demonstrate up to $ 1.5 \times $ training speedup with Posit16-based PMP framework and up to $ 6.5 \times $ training speedup with Posit8-based PMP framework when compared with fixed-precision FP32 training, while maintaining comparable or superior accuracy. Moreover, hardware results show that the design overhead of integrating proposed deterministic and stochastic rounding modules with the pMAC unit is estimated to be around 4.6\% only.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "16", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", } @Article{Bhattacharya:2026:APE, author = "Akash Bhattacharya and Sunandan Adhikary and Ipsita Koley and Vivek Loya and Soumyajit Dey", title = "Adaptive Parameterisation for Efficient Detection of False Data Injections", journal = j-TECS, volume = "25", number = "1", pages = "17:1--17:30", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777903", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Tue Jan 20 07:06:14 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Increasing interconnectivity in modern safety-critical cyber-physical systems (CPSs) renders them susceptible to attacks like false data injection (FDI). Due to computation and communication resource constraints, it is infeasible to encrypt all data \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Embed. Comput. Syst.", articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "https://dl.acm.org/loi/tecs", }