%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "3.257", %%% date = "05 March 2025", %%% time = "07:43:08 MST", %%% filename = "pvm.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "49962 79323 350087 3713516", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "bibliography; BibTeX; CUDA (Compute Unified %%% Device Architecture); MPI; Message Passing %%% Interface; NVIDIA; OpenCL (Open Computing %%% Language); OpenMP; PVM; Parallel Virtual %%% Machine", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a bibliography of publications about %%% PVM (Parallel Virtual Machine) software, and %%% its close relative, MPI (Message Passing %%% Interface). %%% %%% Publications about OpenMP are also included %%% from version 2.00, because OpenMP directives %%% for parallelization in a shared-memory %%% environment are often combined with use of %%% PVM or MPI across distributed-memory systems. %%% %%% Publications about NVIDIA's CUDA programming %%% environment, and about OpenCL, are included %%% from version 3.00. %%% %%% MPI and OpenMP publications may later be %%% split off into a separate bibliography if %%% they prove numerous enough. %%% %%% At version 3.257, the year coverage looked %%% like this: %%% %%% 1989 ( 3) 2001 ( 145) 2013 ( 68) %%% 1990 ( 4) 2002 ( 92) 2014 ( 78) %%% 1991 ( 14) 2003 ( 50) 2015 ( 80) %%% 1992 ( 30) 2004 ( 31) 2016 ( 87) %%% 1993 ( 99) 2005 ( 65) 2017 ( 99) %%% 1994 ( 196) 2006 ( 32) 2018 ( 73) %%% 1995 ( 251) 2007 ( 44) 2019 ( 107) %%% 1996 ( 195) 2008 ( 50) 2020 ( 83) %%% 1997 ( 124) 2009 ( 44) 2021 ( 62) %%% 1998 ( 87) 2010 ( 66) 2022 ( 20) %%% 1999 ( 116) 2011 ( 62) %%% 2000 ( 123) 2012 ( 145) %%% 19xx ( 2) %%% %%% Article: 1902 %%% Book: 37 %%% InCollection: 3 %%% InProceedings: 579 %%% Manual: 1 %%% MastersThesis: 16 %%% Misc: 9 %%% PhdThesis: 2 %%% Proceedings: 228 %%% TechReport: 50 %%% %%% Total entries: 2827 %%% %%% More information about PVM and MPI can be %%% found on the World-Wide Web at these %%% locations: %%% %%% https://www.math.utah.edu/pub/tex/bib/pvm.bib (this file) %%% ftp://math.usfca.edu/pub/MPI/mpi.guide.ps %%% http://lovelace.nas.nasa.gov/Parallel/SP2/MPIPerf/report.html %%% http://www.arc.unm.edu/workshop/mpi/mpi.html %%% http://www.epm.ornl.gov/~walker/mpi/SLIDES/mpi-tutorial.html %%% http://www.mcs.anl.gov/mpi/index.html (MPI home page) %%% http://www.netlib.org/utk/papers/intro-mpi/intro-mpi.html %%% http://www.osc.edu/Lam/mpi/mpi_tut.html %%% http://www.usi.utah.edu/user_guides/spug/ %%% news:comp.parallel.mpi %%% http://www-unix.mcs.anl.gov/mpi/index.html %%% %%% The last of these contains pointers to %%% online versions of the official MPI %%% standards documents. %%% %%% This bibliography was collected from %%% multiple sources: %%% %%% * the authors' own files; %%% * the TeX User Group bibliography %%% collection on ftp.math.utah.edu in %%% /pub/tex/bib; %%% * the very large Computer Science %%% bibliography collection on ftp.ira.uka.de %%% in /pub/bibliography, to which many people %%% have contributed; %%% * Internet library catalogs, including %%% University of California MELVYL, Stanford %%% University RLIN, Library of Congress, %%% OCLC; %%% * Zentralblatt fur Mathematik Mathematics %%% Abstracts (http://www.emis.de/cgi-bin/MATH/); %%% * the OCLC WorldCat, Contents1st, Article1st, %%% Papers1st, and Proceedings databases; %%% * the IEEE INSPEC databases %%% (1989--1997); %%% * the UnCover database. %%% %%% BibTeX citation tags are uniformly chosen %%% as name:year:abbrev, where name is the %%% family name of the first author or editor, %%% year is a 4-digit number, and abbrev is a %%% 3-letter condensation of important title %%% words. Citation tags were automatically %%% generated by software developed for the %%% BibNet Project. %%% %%% In this bibliography, entries are sorted %%% first by ascending year, and within each %%% year, alphabetically by author or editor, %%% and then, if necessary, by the 3-letter %%% abbreviation at the end of the BibTeX %%% citation tag, using the ``bibsort -byyear'' %%% utility. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\hyphenation{ Cor-vi Dough-er-ty Jo-seph Nov-em-ber }" # "\ifx \undefined \booktitle \def \booktitle#1{{{\em #1}}} \fi" # "\ifx \undefined \circled \def \circled #1{(#1)}\fi" # "\ifx \undefined \booktitle \def \booktitle #1{{{\em #1}}} \fi" # "\ifx \undefined \reg \def \reg {\circled{R}}\fi" # "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Institution abbreviations: @String{inst-ANL-mcs = "Mathematics and Computer Science Division, Argonne National Laboratory"} @String{inst-ANL:adr = "9700 South Cass Avenue, Argonne, IL 60439-4801, USA"} @String{inst-CERFACS = "CERFACS"} @String{inst-CERFACS:adr = "Toulouse, France"} @String{inst-EMORY = "Emory University"} @String{inst-EMORY:adr = "Atlanta, GA, USA"} @String{inst-IBM-WATSON = "IBM T. J. Watson Research Center"} @String{inst-IBM-WATSON:adr = "Yorktown Heights, NY, USA"} @String{inst-MSU = "Mississippi State University"} @String{inst-MSU:adr = "Starkville, MS, USA"} @String{inst-NLRC = "NASA Langley Research Center"} @String{inst-NLRC:adr = "Hampton, VA, USA"} @String{inst-ORNL = "Oak Ridge National Laboratory"} @String{inst-ORNL:adr = "Knoxville, TN, USA"} @String{inst-SCS-CMU = "School of Computer Science, Carnegie Mellon University"} @String{inst-SCS-CMU:adr = "Pittsburgh, PA, USA"} @String{inst-UAL-EE = "Department of Electrical Engineering, University of Alabama"} @String{inst-UAL-EE:adr = "Tuscaloosa, AL, USA"} @String{inst-UGA = "University of Georgia"} @String{inst-UGA:adr = "Athens, GA, USA"} @String{inst-UTK = "University of Tennessee, Knoxville"} @String{inst-UTK:adr = "Knoxville, TN 37996, USA"} @String{inst-UTK-CS = "Department of Computer Science, University of Tennessee, Knoxville"} @String{inst-UTK-CS:adr = "Knoxville, TN 37996, USA"} %%% ==================================================================== %%% Journal abbreviations: @String{j-ACM-COMM-COMP-ALGEBRA = "ACM Communications in Computer Algebra"} @String{j-ACM-J-EXP-ALGORITHMICS = "ACM Journal of Experimental Algorithmics"} @String{j-ADV-COMPUT-MATH = "Advances in computational mathematics"} @String{j-ADV-WATER-RESOURCES = "Advances in water resources"} @String{j-AIAA-ASME-ASCE-AHS-STRUCT-STRUCT-DYN-MAT-CONF = "AIAA/ASME/ASCE/AHS Structures, Structural Dynamics \& Materials Conference --- Collection of Technical Papers"} @String{j-ALGORITHMICA = "Algorithmica"} @String{j-ALGORITHMS-BASEL = "Algorithms ({Basel})"} @String{j-APPL-MATH-COMP = "Applied Mathematics and Computation"} @String{j-APPL-NUM-MATH = "Applied Numerical Mathematics: Transactions of IMACS"} @String{j-AUSTRALIAN-COMP-SCI-COMM = "Australian Computer Science Communications"} @String{j-BIOMETRICS = "Biometrics"} @String{j-CACM = "Communications of the ACM"} @String{j-CCPE = "Concurrency and Computation: Prac\-tice and Experience"} @String{j-CGF = "Com{\-}pu{\-}ter Graphics Forum"} @String{j-CHIN-J-COMPUTERS = "Chinese Journal of Computers"} @String{j-COMP-ARCH-NEWS = "ACM SIGARCH Computer Architecture News"} @String{j-COMP-ART-INTELL = "Computers and Artificial Intelligence = Vychislitel'nye mashiny i iskusstvennyi intellekt"} @String{j-COMP-CHEM-ENG = "Computers \& Chemical Engineering"} @String{j-COMP-ECONOMICS = "Computational Economics"} @String{j-COMP-J = "The Computer Journal"} @String{j-COMP-LANGS-SYS-STRUCT = "Computer Languages, Systems and Structures"} @String{j-COMP-MECH = "Computational mechanics"} @String{j-COMP-NET-AMSTERDAM = "Computer Networks (Amsterdam, Netherlands: 1999)"} @String{j-COMP-PHYS-COMM = "Computer Physics Communications"} @String{j-COMP-STAT = "Computational Statistics"} @String{j-COMP-SURV = "ACM Computing Surveys"} @String{j-COMP-SYS = "Computing systems: the journal of the USENIX Association"} @String{j-COMPUT-MATH-APPL = "Computers and Mathematics with Applications"} @String{j-COMPUT-METH-APPL-MECH-ENG = "Computer Methods in Applied Mechanics and Engineering"} @String{j-COMPUT-PHYS = "Computers in Physics"} @String{j-COMPUT-SCI-ENG = "Computing in Science and Engineering"} @String{j-COMPUT-SYST-ENG = "Computing systems in engineering: an international journal"} @String{j-COMPUTER = "Computer"} @String{j-COMPUTERS-AND-GRAPHICS = "Computers and Graphics"} @String{j-COMPUTING = "Computing"} @String{j-CPE = "Concurrency: practice and experience"} @String{j-CRAY-CHANNELS = "CRAY Channels"} @String{j-DEC-TECH-J = "Digital Technical Journal of Digital Equipment Corporation"} @String{j-DISCRETE-APPL-MATH = "Discrete Applied Mathematics"} @String{j-ELECT-LETTERS = "Electronics Letters"} @String{j-ENG-SCI-REP-KYUSHU = "Engineering Sciences Reports, Kyushu University"} @String{j-FORTRAN-FORUM = "ACM Fortran Forum"} @String{j-FRONTIERS-MASS-PAR-COMP-CONF-PROC = "Frontiers of Massively Parallel Computation --- Conference Proceedings"} @String{j-FUT-GEN-COMP-SYS = "Future Generation Computer Systems"} @String{j-FUTURE-INTERNET = "Future Internet"} @String{j-HIGH-TECH-LETT = "High Technology Letters"} @String{j-HUMAN-HEREDITY = "Human heredity"} @String{j-IBM-JRD = "IBM Journal of Research and Development"} @String{j-IBM-SYS-J = "IBM Systems Journal"} @String{j-IEEE-COMPUT-ARCHIT-LETT = "IEEE Computer Architecture Letters"} @String{j-IEEE-COMPUT-SCI-ENG = "IEEE Computational Science \& Engineering"} @String{j-IEEE-CONCURR = "IEEE Concurrency"} @String{j-IEEE-DISTRIB-SYST-ONLINE = "IEEE Distributed Systems Online"} @String{j-IEEE-INT-CONF-ALG-ARCH-PAR-PROC = "IEEE International Conference on Algorithms and Architectures for Parallel Processing"} @String{j-IEEE-J-SEL-AREAS-COMMUN = "IEEE Journal on Selected Areas in Communications"} @String{j-IEEE-MICRO = "IEEE Micro"} @String{j-IEEE-MICROW-GUIDED-WAVE-LETT = "IEEE Microwave and Guided Wave Letters"} @String{j-IEEE-PAR-DIST-TECH = "IEEE parallel and distributed technology: systems and applications"} @String{j-IEEE-TRANS-COMPUT = "IEEE Transactions on Computers"} @String{j-IEEE-TRANS-PAR-DIST-SYS = "IEEE Transactions on Parallel and Distributed Systems"} @String{j-IEEE-TRANS-SOFTW-ENG = "IEEE Transactions on Software Engineering"} @String{j-IEEE-TRANS-VIS-COMPUT-GRAPH = "IEEE Transactions on Visualization and Computer Graphics"} @String{j-IFIP-TRANS-A = "IFIP Transactions. A. Computer Science and Technology"} @String{j-IJHPCA = "The International Journal of High Performance Computing Applications"} @String{j-IJQC = "International Journal of Quantum Chemistry"} @String{j-IJSA = "The International Journal of Supercomputer Applications"} @String{j-IJSAHPC = "International Journal of Supercomputer Applications and High Performance Computing"} @String{j-INFO-SOFTWARE-TECH = "Information and Software Technology"} @String{j-INFORMATICA = "Informatica (Ljubljana, Slovenia)"} @String{j-INT-J-COMPUT-APPL = "International Journal of Computers and Applications"} @String{j-INT-J-COMPUT-SYST-SCI-ENG = "International Journal of Computer Systems Science and Engineering"} @String{j-INT-J-HIGH-SPEED-COMPUTING = "International Journal of High Speed Computing"} @String{j-INT-J-IMAGE-GRAPHICS = "International Journal of Image and Graphics (IJIG)"} @String{j-INT-J-NUMER-METHODS-FLUIDS = "International Journal for Numerical Methods in Fluids"} @String{j-INT-J-PAR-EMER-DIST-SYS = "International Journal of Parallel, Emergent and Distributed Systems: IJPEDS"} @String{j-INT-J-PARALLEL-PROG = "International Journal of Parallel Programming"} @String{j-INTEL-TECH-J = "Intel Technology Journal"} @String{j-IT-IT = "Informationstechnik und technische Informatik: IT + TI"} @String{j-J-APPL-ECONOMETRICS = "Journal of Applied Econometrics"} @String{j-J-APPL-PHYS = "Journal of Applied Physics"} @String{j-J-COMP-SCI-TECH = "Journal of computer science and technology"} @String{j-J-COMP-SYS-SCI = "Journal of Computer and System Sciences"} @String{j-J-COMPUT-APPL-MATH = "Journal of Computational and Applied Mathematics"} @String{j-J-COMPUT-BIOL = "Journal of Computational Biology"} @String{j-J-COMPUT-CHEM = "Journal of Computational Chemistry"} @String{j-J-COMPUT-PHYS = "Journal of Computational Physics"} @String{j-J-GRID-COMP = "Journal of Grid Computing"} @String{j-J-MOL-STRUCT-THEOCHEM = "Journal of molecular structure. Theochem"} @String{j-J-OPEN-RES-SOFT = "Journal of Open Research Software"} @String{j-J-PAR-DIST-COMP = "Journal of Parallel and Distributed Computing"} @String{j-J-PHYS-IV-COLLOQUE = "Journal de physique. IV, Colloque"} @String{j-J-PROGRAM-LANG = "Journal of Programming Languages"} @String{j-J-SCI-COMPUT = "Journal of Scientific Computing"} @String{j-J-STAT-SOFT = "Journal of Statistical Software"} @String{j-J-SUPERCOMPUTING = "The Journal of Supercomputing"} @String{j-J-SYST-SOFTW = "The Journal of Systems and Software"} @String{j-J-UCS = "J.UCS: Journal of Universal Computer Science"} @String{j-JETC = "ACM Journal on Emerging Technologies in Computing Systems (JETC)"} @String{j-JOHO-SHORI = "Joho-Shori (J. Information Processing Soc. Japan)"} @String{j-LECT-NOTES-COMP-SCI = "Lecture Notes in Computer Science"} @String{j-LINUX-J = "Linux Journal"} @String{j-MICROCOMP-CIVIL-ENG = "Microcomputers in Civil Engineering"} @String{j-MICROPROC-MICROPROG = "Microprocessing and Microprogramming"} @String{j-MINI-MICRO-SYSTEMS = "Mini-Micro Systems"} @String{j-NETWORK-SECURITY = "Network Security"} @String{j-NEURAL-PAR-SCI-COMPUT = "Neural, Parallel and Scientific Computations"} @String{j-NUCL-SCI-ENG = "Nuclear Science and Engineering"} @String{j-NUCLEAR-SAFETY = "Nuclear safety"} @String{j-NUMER-ALGORITHMS = "Numerical Algorithms"} @String{j-OPER-SYS-REV = "Operating Systems Review"} @String{j-PACMPL = "Proceedings of the ACM on Programming Languages (PACMPL)"} @String{j-PARALLEL-ALGORITHMS-APPL = "Parallel Algorithms and Applications"} @String{j-PARALLEL-COMPUTING = "Parallel Computing"} @String{j-PARALLEL-DIST-COMP-PRACT = "Parallel and Distributed Computing Practices"} @String{j-PARALLEL-PROCESS-LETT = "Parallel Processing Letters"} @String{j-PARALLELOGRAM = "Parallelogram"} @String{j-POMACS = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)"} @String{j-PROC-INT-CONF-PAR-PROC = "Proceedings of the International Conference on Parallel Processing"} @String{j-PROC-SPIE = "Proceedings of the SPIE --- The International Society for Optical Engineering"} @String{j-PROC-SUPERCOMPUT = "Proceedings of the Supercomputing Conference"} @String{j-PROC-VLDB-ENDOWMENT = "Proceedings of the VLDB Endowment"} @String{j-PROGRAMMIROVANIE = "Programmirovanie"} @String{j-QUEUE = "ACM Queue: Tomorrow's Computing Today"} @String{j-R-JOURNAL = "The R Journal"} @String{j-R-NEWS = "R News: the Newsletter of the R Project"} @String{j-REAL-TIME-IMAGING = "Real-Time Imaging"} @String{j-SCI-COMPUT-PROGRAM = "Science of Computer Programming"} @String{j-SCI-PROG = "Scientific Programming"} @String{j-SCPE = "Scalable Computing: Practice and Experience"} @String{j-SIAM-J-OPT = "SIAM Journal on Optimization"} @String{j-SIAM-J-SCI-COMP = "SIAM Journal on Scientific Computing"} @String{j-SIAM-NEWS = "SIAM News"} @String{j-SIGADA-LETTERS = "ACM SIGADA Ada Letters"} @String{j-SIGCSE = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)"} @String{j-SIGMETRICS = "ACM SIGMETRICS Performance Evaluation Review"} @String{j-SIGMOD = "SIGMOD Record (ACM Special Interest Group on Management of Data)"} @String{j-SIGNAL-PROCESS-IMAGE-COMMUN = "Signal Processing: Image Communication"} @String{j-SIGPLAN = "ACM SIG{\-}PLAN Notices"} @String{j-SIGSAM = "SIGSAM Bulletin (ACM Special Interest Group on Symbolic and Algebraic Manipulation)"} @String{j-SIGSOFT = "ACM SIGSOFT Software Engineering Notes"} @String{j-SIM-MODEL-PRACT-THEORY = "Simulation Modelling Practice and Theory"} @String{j-SOFTWAREX = "SoftwareX"} @String{j-SPE = "Soft{\-}ware\emdash Prac{\-}tice and Experience"} @String{j-STAT-COMPUT = "Statistics and Computing"} @String{j-SUPERCOMPUTER = "Supercomputer"} @String{j-SUPERFRI = "Supercomputing Frontiers and Innovations"} @String{j-TACO = "ACM Transactions on Architecture and Code Optimization"} @String{j-TCBB = "IEEE/ACM Transactions on Computational Biology and Bioinformatics"} @String{j-TECS = "ACM Transactions on Embedded Computing Systems"} @String{j-TKDD = "ACM Transactions on Knowledge Discovery from Data (TKDD)"} @String{j-TOCE = "ACM Transactions on Computing Education"} @String{j-TOCL = "ACM Transactions on Computational Logic"} @String{j-TOCS = "ACM Transactions on Computer Systems"} @String{j-TODAES = "ACM Transactions on Design Automation of Electronic Systems"} @String{j-TOG = "ACM Transactions on Graphics"} @String{j-TOMACS = "ACM Transactions on Modeling and Computer Simulation"} @String{j-TOMCCAP = "ACM Transactions on Multimedia Computing, Communications, and Applications"} @String{j-TOMPECS = "ACM Transactions on Modeling and Performance Evaluation of Computing Systems (TOMPECS)"} @String{j-TOMS = "ACM Transactions on Mathematical Software"} @String{j-TOPC = "ACM Transactions on Parallel Computing (TOPC)"} @String{j-TOPLAS = "ACM Transactions on Programming Languages and Systems"} @String{j-TOSEM = "ACM Transactions on Software Engineering and Methodology"} @String{j-TRANS-AM-NUCL-SOC = "Transactions of the American Nuclear Society"} @String{j-TRANS-INFO-PROCESSING-SOC-JAPAN = "Transactions of the Information Processing Society of Japan"} @String{j-TRETS = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)"} @String{j-TSAS = "ACM Transactions on Spatial Algorithms and Systems (TSAS)"} @String{j-VLDB-J = "VLDB Journal: Very Large Data Bases"} %%% ==================================================================== %%% Publisher abbreviations: @String{pub-ACM = "ACM Press"} @String{pub-ACM:adr = "New York, NY 10036, USA"} @String{pub-AIP = "American Institute of Physics"} @String{pub-AIP:adr = "Woodbury, NY, USA"} @String{pub-ASME = "American Society Mech. Engineers"} @String{pub-ASME:adr = "United Engineering Center, 345 E. 47th St., New York, NY 10017, USA"} @String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"} @String{pub-AW:adr = "Reading, MA, USA"} @String{pub-BIRKHAUSER = "Birkh{\"a}user"} @String{pub-BIRKHAUSER:adr = "Cambridge, MA, USA; Berlin, Germany; Basel, Switzerland"} @String{pub-CAMBRIDGE = "Cambridge University Press"} @String{pub-CAMBRIDGE:adr = "Cambridge, UK"} @String{pub-CHAPMAN-HALL = "Chapman and Hall, Ltd."} @String{pub-CHAPMAN-HALL:adr = "London, UK"} @String{pub-CHAPMAN-HALL-CRC = "Chapman and Hall/CRC"} @String{pub-CHAPMAN-HALL-CRC:adr = "Boca Raton, FL, USA"} @String{pub-CRC = "CRC Press"} @String{pub-CRC:adr = "2000 N.W. Corporate Blvd., Boca Raton, FL 33431-9868, USA"} @String{pub-ELS = "Elsevier"} @String{pub-ELS:adr = "Amsterdam, The Netherlands"} @String{pub-ELSAS = "Elsevier Applied Science"} @String{pub-ELSAS:adr = "London, UK"} @String{pub-IEEE = "IEEE Computer Society Press"} @String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver Spring, MD 20910, USA"} @String{pub-IOS = "IOS Press"} @String{pub-IOS:adr = "Postal Drawer 10558, Burke, VA 2209-0558, USA"} @String{pub-KLUWER = "Kluwer Academic Publishers Group"} @String{pub-KLUWER:adr = "Norwell, MA, USA, and Dordrecht, The Netherlands"} @String{pub-MCGRAW-HILL = "Mc{\-}Graw-Hill"} @String{pub-MCGRAW-HILL:adr = "New York, NY, USA"} @String{pub-MIT = "MIT Press"} @String{pub-MIT:adr = "Cambridge, MA, USA"} @String{pub-MORGAN-KAUFMANN = "Morgan Kaufmann Publishers"} @String{pub-MORGAN-KAUFMANN:adr = "Los Altos, CA 94022, USA"} @String{pub-MORGAN-KAUFMANN:adrnew = "2929 Campus Drive, Suite 260, San Mateo, CA 94403, USA"} @String{pub-NASA = "National Aeronautics and Space Administration"} @String{pub-NASA:adr = "Washington, DC, USA"} @String{pub-NH = "North-Hol{\-}land"} @String{pub-NH:adr = "Amsterdam, The Netherlands"} @String{pub-NTIS = "National Technical Information Service"} @String{pub-NTIS:adr = "Washington, DC, USA"} @String{pub-ORA = "O'Reilly \& {Associates, Inc.}"} @String{pub-ORA:adr = "981 Chestnut Street, Newton, MA 02164, USA"} @String{pub-OXFORD = "Oxford University Press"} @String{pub-OXFORD:adr = "Walton Street, Oxford OX2 6DP, UK"} @String{pub-PHI = "Pren{\-}tice-Hall International"} @String{pub-PHI:adr = "Englewood Cliffs, NJ 07632, USA"} @String{pub-PLENUM = "Plenum Press"} @String{pub-PLENUM:adr = "New York, NY, USA"} @String{pub-SCRI = "Supercomputing Computations Research Institute, Florida State University"} @String{pub-SCRI:adr = "Tallahassee, FL, USA"} @String{pub-SIAM = "Society for Industrial and Applied Mathematics"} @String{pub-SIAM:adr = "Philadelphia, PA, USA"} @String{pub-SPE = "Society of Petroleum Engineers"} @String{pub-SPE:adr = "Richardson, TX, USA"} @String{pub-SPIE = "Society of Photo-optical Instrumentation Engineers (SPIE)"} @String{pub-SPIE:adr = "Bellingham, WA, USA"} @String{pub-SUN-MICROSYSTEMS-PRESS = "Sun Microsystems Press"} @String{pub-SUN-MICROSYSTEMS-PRESS:adr = "Palo Alto, CA, USA"} @String{pub-SV = "Spring{\-}er-Ver{\-}lag"} @String{pub-SV:adr = "Berlin, Germany~/ Heidelberg, Germany~/ London, UK~/ etc."} @String{pub-USENIX = "USENIX"} @String{pub-USENIX:adr = "Berkeley, CA, USA"} @String{pub-WILEY = "Wiley"} @String{pub-WILEY:adr = "New York, NY, USA"} @String{pub-WORLD-SCI = "World Scientific Publishing Co. Pte. Ltd."} @String{pub-WORLD-SCI:adr = "P. O. Box 128, Farrer Road, Singapore 9128"} %%% ==================================================================== %%% Series abbreviations: @String{ser-LNAI = "Lecture Notes in Artificial Intelligence"} @String{ser-LNCS = "Lecture Notes in Computer Science"} @String{ser-LNCSE = "Lecture Notes in Computational Science and Engineering"} %%% ==================================================================== %%% Bibliography entries, sorted by year and then by citation label, %%% with `bibsort -byyear': @Article{Abrossimov:1989:GVM, author = "V. Abrossimov and M. Rozier and M. Shapiro", title = "Generic virtual memory management for operating system kernels", journal = j-OPER-SYS-REV, volume = "23", number = "5", pages = "123--136", year = "1989", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Sun Dec 22 10:16:35 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Chorus Systemes, Saint-Quentin-en-Yvelines, France", classification = "C6120 (File organisation); C6150J (Operating systems)", fjournal = "Operating Systems Review", keywords = "Chorus Nucleus; Consistent cache; Data caching; Deferred copying; Explicit I/O; Generic Memory management Interface; History object technique; Mapped objects; Operating system kernel; Paged architectures; Paged Virtual Memory manager; PVM; Real memory; Unix", thesaurus = "Buffer storage; Operating systems [computers]; Virtual storage", } @InProceedings{Poplawski:1989:MPP, author = "D. A. Poplawski and S. Pahwa and J. M. Francioni", title = "Models of parallel program behavior", crossref = "Anonymous:1989:PFC", pages = "857--860 (vol. 2)", year = "1989", bibdate = "Sun Dec 22 10:16:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan Technol. Univ., Houghton, MI, USA", classification = "C4240 (Programming and algorithm theory); C6110 (Systems analysis and programming); C6120 (File organisation)", keywords = "Distributed memory MIMD; Hypercube programs; Parallel program behavior; Parallel virtual memory; PVM", thesaurus = "Hypercube networks; Parallel programming; Virtual storage", } @InProceedings{Feeley:1990:PVM, author = "Marc Feeley and James S. Miller", booktitle = "{Proceedings of the 1990 ACM Conference on LISP and Functional Programming, Nice}", title = "A parallel virtual machine for efficient {Scheme} compilation", crossref = "ACM:1990:PAC", publisher = pub-ACM, address = pub-ACM:adr, bookpages = "????", pages = "119--130", month = jun, year = "1990", bibdate = "Wed Jan 24 04:51:56 MST 2001", bibsource = "http://dblp.uni-trier.de/db/conf/lfp/lfp1990.html#FeeleyM90; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/citations/proceedings/lfp/91556/p119-feeley/", abstract = "Programs compiled by Gambit, our Scheme compiler, achieve performance as much as twice that of the fastest available Scheme compilers. Gambit is easily ported, while retaining its high performance, through the use of a simple virtual machine (PVM). PVM allows a wide variety of machine-independent optimizations and it supports parallel computation based on the future construct. PVM conveys high-level information bidirectionally between the machine-independent front end of the compiler and the machine-dependent back end, making it easy to implement a number of common back end optimizations that are difficult to achieve for other virtual machines. PVM is similar to many real computer architectures and has an option to efficiently gather dynamic measurements of virtual machine usage. These measurements can be used in performance prediction for ports to other architectures as well as design decisions related to proposed optimizations and object representations.", acknowledgement = ack-nhfb, affiliation = "Brandeis Univ., Waltham, MA, USA", classification = "C6150C (Compilers, interpreters and other processors)", conflocation = "Nice, France; 27--29 June 1990", corpsource = "Brandeis Univ., Waltham, MA, USA", keywords = "Gambit; Lisp; machine-independent; Machine-independent front end; machine-independent front end; Machine-independent optimizations; object; Object representations; optimizations; parallel processing; Parallel virtual machine; parallel virtual machine; portability; program compilers; PVM portability; representations; Scheme compiler; simple virtual machine; Simple virtual machine; software; virtual machines", oldlabel = "FeeleyM90", sponsororg = "ACM", thesaurus = "Parallel processing; Program compilers; Software portability; Virtual machines", treatment = "P Practical", XMLdata = "ftp://ftp.informatik.uni-trier.de/pub/users/Ley/bib/records.tar.gz#conf/lfp/FeeleyM90", } @TechReport{Sunderam:1990:PFPa, author = "V. S. Sunderam", title = "{PVM}: a Framework for Parallel Distributed Computing", number = "ORNL/TM-11375", institution = "Dept. of Math and Computer Science, " # inst-EMORY, address = inst-EMORY:adr, month = feb, year = "1990", bibsource = "Distributed/dist.sys.1.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See also \cite{Sunderam:1990:PFPb}.", comment = "Good overview of PVM, though now a little out of date. Supports dynamic, location-transparent, process initiation, typed message passing and shared memory, broadcast and distributed synchronization, and heterogeneity in the form of language- and machine-independence, type conversion, and multiple executables for each component. Seems to be heavily dependent on broadcast. Shared memory is somewhat limited. See also beguelin:concsuper. [David.Kotz at Dartmouth.edu]", keyword = "heterogeneous computing, distributed computing, network parallel computing", } @Article{Sunderam:1990:PFPb, author = "V. S. Sunderam", title = "{PVM}: a Framework for Parallel Distributed Computing", journal = j-CPE, volume = "2", number = "4", pages = "315--339", month = dec, year = "1990", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 05:40:19 MDT 1999", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; Misc/IMMD_IV.bib", note = "See also the earlier technical report \cite{Sunderam:1990:PFPa}.", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "C6115 (Programming support)", corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", fjournal = "Concurrency, practice and experience", keywords = "algorithms; Algorithms; concurrent; Concurrent; conditional execution; Conditional execution; distributed processing; environment; environments; error detection; Error detection; interface; Interface; parallel distributed computing; Parallel distributed computing; parallel programming; programming; Programming environment; PVM system; sequential; Sequential; virtual computing; Virtual computing environment", pubcountry = "UK", thesaurus = "Distributed processing; Parallel programming; Programming environments", treatment = "P Practical", } @Article{Balou:1991:DIV, author = "A. T. Balou and A. N. Refenes", title = "The design and implementation of {VOOM}: a parallel virtual object oriented machine", journal = j-MICROPROC-MICROPROG, volume = "32", number = "1-5", pages = "289--296", month = aug, year = "1991", CODEN = "MMICDT", ISSN = "0165-6074 (print), 1878-7061 (electronic)", ISSN-L = "0165-6074", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220 (Computer architecture); C6110 (Systems analysis and programming); C7430 (Computer engineering)", conflocation = "Vienna, Austria; 2-5 Sept. 1991", conftitle = "17th EUROMICRO Symposium on Microprocessing and Microprogramming. Hardware and Software Design Automation", corpsource = "Dept. of Comput. Sci., Univ. Coll. London, UK", fjournal = "Microprocessing and Microprogramming", keywords = "design; execution unit; implementation; machine; machines; memory management unit; memory recycling; object management; object-oriented model; object-oriented programming; packet-switching network; parallel architecture; parallel architectures; parallel virtual object oriented; pre-fetch unit; virtual", pubcountry = "Netherlands", treatment = "P Practical", } @InProceedings{Beguelin:1991:GDT, author = "Adam Beguelin and Jack J. Dongarra and A. Geist and Robert Manchek and V. S. Sunderam", title = "Graphical Development Tools for Network-Based Concurrent Supercomputing", crossref = "IEEE:1991:PSA", pages = "435--444", year = "1991", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/clusters.bib; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat Lab., TN, USA", classification = "C6110P (Parallel programming); C6115 (Programming support); C6180G (Graphical user interfaces)", comment = "Clusters of workstations solving supercomputing problems. This is a graphical front-end to PVM, that allows the user to specify a set of subroutines, their parameters and output values, and the dependencies between them. It can compile the parts on multiple machines. At run time it chooses where to execute each module, and when, based on the dependencies and on a user-supplied cost matrix showing the cost of running each module in each place. See also beguelin:hence. [David.Kotz at Dartmouth.edu]", keyword = "network supercomputing, distributed computing", keywords = "Application program; Graphical development tools; HeNCE; Heterogeneous network computing environment; Integrated graphical tools; Network-based concurrent supercomputing; Parallel programs; Parallel Virtual Machine; Process management and communication; PVM; Software package; X-window-based software environment", thesaurus = "Graphical user interfaces; Parallel programming; Programming environments", } @TechReport{Beguelin:1991:UGP, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and V. Sunderam", title = "A User's guide to {PVM}: Parallel virtual machine", type = "Technical Report", number = "ORNL/TM-11826", institution = "Mathematical Sciences Section, Oak Ridge National Laboratory", address = inst-ORNL:adr, month = sep, year = "1991", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/par.lin.alg.bib; Theory/Matrix.bib", keywords = "prll, operating system", } @InProceedings{Benzoni:1991:MFR, author = "A. Benzoni and V. S. Sunderam and R. van de Guijn", title = "Matrix factorization on a {RISC} workstation network", crossref = "Durand:1991:HPC", pages = "207--218", year = "1991", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM ECSEC, Roma, Italy", classification = "C4140 (Linear algebra); C5220 (Computer architecture); C5470 (Performance evaluation and testing); C5620L (Local area networks)", keywords = "20 To 60 MFLOPS; Concurrent process management; Dense matrix; Distributed memory architecture; Distributed programming environment; Ethernet; Heterogeneous distributed computing environment; High-speed network; Independent processing units; LU factorization; Numerically intensive applications; Optical fiber link; PVM; RISC System/6000 workstations; RISC workstation network; Synchronization; Token Ring local area network", numericalindex = "Computer speed 2.0E+07 to 6.0E+07 FLOPS", thesaurus = "Distributed processing; Local area networks; Matrix algebra; Optical links; Performance evaluation", } @Manual{Dongarra:1991:UGP, author = "Jack Dongarra and others", title = "A Users' Guide to {PVM} Parallel Virtual Machine", organization = inst-ORNL, address = inst-ORNL:adr, month = Jul, year = "1991", bibsource = "Distributed/Dist.Sys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @TechReport{Geist:1991:ENB, author = "G. A. Geist and V. S. Sunderam", title = "Experiences with network based concurrent computing on the {PVM} system", number = "ORNL/TM-11760", institution = inst-ORNL, address = inst-ORNL:adr, month = jan, year = "1991", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Geist:1991:PSS, author = "G. A. Geist and V. S. Sunderam", title = "The {PVM} System: {Supercomputer} Level Concurrent Computation on a Heterogeneous Network of Workstations", crossref = "Stout:1991:SDM", pages = "258--261", year = "1991", bibsource = "Distributed/dist.sys.1.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", comment = "A more up-to-date, but shorter, overview of PVM and its performance than sunderam:pvm. Good performance on networks of IBM RS/6000s. [David.Kotz at Dartmouth.edu]", keyword = "distributed heterogeneous computing", } @Article{Meleshchuk:1991:IPP, author = "S. B. Meleshchuk and A. N. Nedumov", title = "Implementation of a protocol for parallel database access with virtual machine communications facilities", journal = j-PROGRAMMIROVANIE, volume = "17", number = "1", pages = "35--42", month = jan # "\slash " # feb, year = "1991", CODEN = "PCSODA", ISSN = "0132-3474, 0361-7688", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "English translation in Programming and Computer Software, vol. 17, no. 1, pp. 27--32, November 1991.", acknowledgement = ack-nhfb, classification = "C6150J (Operating systems)", corpsource = "Leningrad Techn. State Univ., USSR", fjournal = "Programmirovanie", keywords = "COMMIT protocol; concurrency control; deadlock; electronic mail; interrupts; IUCV mail facility; machines; parallel; parallel database access; processing; protocol; protocols; virtual; virtual machines", pubcountry = "USSR", treatment = "P Practical", } @InProceedings{Nagaraj:1991:MHL, author = "U. Nagaraj and U. S. Shukla", title = "{MK}: a high level interface for message passing", crossref = "Bhavsar:1991:SSJ", pages = "493--502", year = "1991", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150J (Operating systems)", corpsource = "Centre for Dev. of Adv. Comput., Bangalore, India", keywords = "communication interface; high level interface; interconnection network technology; message passing multicomputer; MK; network operating systems; programming environment; software interfaces; transputer network", treatment = "P Practical", } @Article{Saltz:1991:MRT, author = "J. Saltz and H. Berryman and J. Wu", title = "Multiprocessors and Run-time Compilation", journal = j-CPE, volume = "3", number = "6", pages = "573--592", month = dec, year = "1991", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 05:40:19 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @MastersThesis{Al-Salman:1992:DIP, author = "Abdulmalik Salman Al-Salman", title = "Design and implementation of a profiler for the Parallel Virtual Machine ({PVM}) system", type = "M.S. thesis", school = inst-UGA, address = inst-UGA:adr, pages = "vi + 51", year = "1992", bibdate = "Mon Jan 15 16:37:21 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Directed by Steven C. Cater.", acknowledgement = ack-nhfb, } @InProceedings{Alfano:1992:DNA, author = "M. Alfano and G. {Lo Re}", title = "Distributing numerical algorithms: some experiences with network computing system ({NCS}) and parallel virtual machine ({PVM})", crossref = "SCRI:1992:PWC", year = "1992", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Beguelin:1992:HGD, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and K. Moore and R. Wade and V. Sunderam", title = "{HeNCE}: graphical development tools for network-based concurrent computing", crossref = "IEEE:1992:PSH", pages = "129--136", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., TN, USA", classification = "C6110P (Parallel programming); C6115 (Programming support); C6130B (Graphics techniques); C6150C (Compilers, interpreters and other processors); C6180G (Graphical user interfaces)", keywords = "Distributed virtual computer; Graphical development tools; Graphical interface; Graphical parallel programming environment; HeNCE; Heterogeneous machines; Heterogeneous network computing environment; Network-based concurrent computing; Program compiler; Program debugging; PVM; Unix workstation; X Window", thesaurus = "Graphical user interfaces; Parallel programming; Program compilers; Program debugging; Programming environments; Software tools", } @Article{Beguelin:1992:PHT, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and V. Sunderam", title = "{PVM} and {HeNCE}: traversing the parallel environment", journal = j-CRAY-CHANNELS, volume = "14", number = "4", pages = "22--25", month = "Fall", year = "1992", CODEN = "CRCHE8", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Carnegie-Mellon Univ., Pittsburgh, PA, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150N (Distributed systems); C7430 (Computer engineering)", corpsource = "Carnegie-Mellon Univ., Pittsburgh, PA, USA", fjournal = "CRAY Channels", keywords = "cost-effective use; Cost-effective use; Cray Research MPP systems; diverse architectures; Diverse architectures; diverse computer systems; Diverse computer systems; HeNCE; Heterogeneous Network Computing Environment; heterogeneous networks; Heterogeneous networks; Machine; network operating systems; networked resources; Networked resources; packages; parallel; parallel machines; Parallel Virtual; Parallel Virtual Machine; portability; Portability; programming; PVM; software; software packages; Software packages; virtual machines", thesaurus = "Network operating systems; Parallel machines; Parallel programming; Software packages; Virtual machines", treatment = "P Practical; R Product Review", } @InProceedings{Beguelin:1992:SCG, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and V. Sunderam", title = "Solving computational grand challenges using a network of heterogeneous supercomputers", crossref = "Dongarra:1992:PFS", pages = "596--601", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., Tennessee Univ., Knoxville, TN, USA", classification = "C5440 (Multiprocessor systems and techniques); C5620W (Other networks); C6110P (Parallel programming); C6115 (Programming support); C7430 (Computer engineering)", keywords = "Computational grand challenges; Cray XMP; Flexibility; High speed network; Intel iPSC/860; Network of heterogeneous supercomputers; Parallel virtual machine; Thinking Machines CM2; Virtual computer", thesaurus = "Parallel processing; Parallel programming; Programming environments; Virtual machines; Wide area networks", } @TechReport{Beguelin:1992:XTM, author = "Adam Louis Beguelin", title = "Xab: a tool for monitoring {PVM} programs", institution = inst-SCS-CMU, address = inst-SCS-CMU:adr, day = "5", month = jun, year = "1992", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Benzoni:1992:CLF, author = "A. Benzoni and G. Richelli and V. S. Sunderam", title = "Concurrent {LU} factorization on workstation networks", crossref = "Evans:1992:PCP", pages = "159--166", year = "1992", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM ECSEC, Roma, Italy", classification = "B0290H (Linear algebra); B6210L (Computer communications); B6260 (Optical links and equipment); C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C5620L (Local area networks)", keywords = "6 MByte/s; Concurrent LU factorization; Dense matrix; Ethernet network; Fiber optic links; IBM RISC System/6000 workstations; Optical fiber links; PVM software system; Workstation networks", numericalindex = "Byte rate 6.0E+06 Byte/s", thesaurus = "Local area networks; Matrix algebra; Optical links; Parallel algorithms; Workstations", } @TechReport{Dongarra:1992:PUL, author = "Jack J. Dongarra and Rolf Hempel and Anthony J. G. Hey and David W. Walker", title = "A Proposal for a User-Level Message-Passing Interface in a Distributed Memory Environment", type = "Technical Report", number = "TM-12231", institution = inst-ORNL, address = inst-ORNL:adr, month = oct, year = "1992", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Par.Arch.Indep.bib; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Duval:1992:TPP, author = "D. Duval", title = "Trends in parallel programming models for high performance computers", crossref = "Ferenczi:1992:AHW", pages = "33", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Parallel Comput. Div., TELMAT Inf., Soultz, France", classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming)", keywords = "CS-Tools; F90; Heterogeneous scalable networks; High Performance Fortran; Massively parallel machines; Neural coprocessor; Parallel programming models; PARMACS; PVM; Scientific applications; SHAPES ASI; SPMD; Superscalar; Transputers; Vector facilities", thesaurus = "Parallel programming; Programming theory; Software engineering", } @InProceedings{Eppstein:1992:PGC, author = "Margaret J. Eppstein and Joseph F. Guarnaccia and David Emery Dougherty and Robert S. Kerr", title = "Parallel groundwater computations using {PVM}", crossref = "Russell:1992:CMW", pages = "713--720", year = "1992", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, annote = "Caption title. Published in Computational methods in water resources IX, vol. 1, Numerical methods in water resources. EPA/600/A-92/157 PB92-206572. Microfiche. Springfield, VA: National Technical Information Service, [1992]. 1 microfiche: negative.", keywords = "Groundwater flow --- Computer programs", } @Book{Freeman:1992:PNA, author = "T. L. (Len) Freeman and C. (Christopher) Phillips", title = "Parallel numerical algorithms", publisher = pub-PHI, address = pub-PHI:adr, pages = "xii + 315", year = "1992", ISBN = "0-13-651597-5", ISBN-13 = "978-0-13-651597-5", LCCN = "QA76.9.A43 F74 1992", bibdate = "Mon Oct 07 09:13:23 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Chapter 5 discusses HPF and PVM.", price = "US\$40.00", series = "Prentice Hall International Series in Computer Science", acknowledgement = ack-nhfb, } @Article{Geist:1992:NBC, author = "G. A. Geist and V. S. Sunderam", title = "Network-based Concurrent Computing on the {PVM} System", journal = j-CPE, volume = "4", number = "4", pages = "293--312 (or 293--311??)", month = jun, year = "1992", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", classification = "C5440 (Multiprocessor systems and techniques); C6180G (Graphical user interfaces); C7430 (Computer engineering)", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", fjournal = "Concurrency, practice and experience", keywords = "Computational resource; computational resource; Concurrent computing environment; concurrent computing environment; coupled networks; graphical interface; graphical user interfaces; interactive; Interactive graphical interface; loosely; Loosely coupled networks; machines; Multiprocessing; multiprocessing; parallel processing; Parallel Virtual Machine; Performance; performance; Porting; porting; PVM system; Software package; software package; virtual", pubcountry = "UK", thesaurus = "Graphical user interfaces; Parallel processing; Virtual machines", treatment = "P Practical", } @TechReport{Gropp:1992:TIM, author = "Bill Gropp and Ewing Lusk", title = "A test implementation of the {MPI} draft message-passing standard", institution = inst-ANL-mcs, address = inst-ANL:adr, year = "1992", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Leon:1992:FP, author = "Juan Leon and Allan L. Fisher and Peter Steenkiste", title = "Fail-safe {PVM}", crossref = "SCRI:1992:PWC", year = "1992", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @Article{Majumdar:1992:PPC, author = "A. Majumdar and W. R. Martin", title = "Parallel preconditioned conjugate gradient algorithm applied to neutron diffusion problem", journal = j-TRANS-AM-NUCL-SOC, volume = "65", pages = "209--210", year = "1992", CODEN = "TANSAO", ISSN = "0003-018X", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Michigan Univ., Ann Arbor, MI, USA", classification = "A0260 (Numerical approximation and analysis); A2820H (Neutron diffusion); A2841C (Computer codes); C4130 (Interpolation and function approximation); C4240P (Parallel programming and algorithm theory); C7470 (Nuclear engineering)", fjournal = "Transactions of the American Nuclear Society", keywords = "BBN TC2000; Distributed workstation; IBM RS6000; Iterative method; Linear system; Neutron diffusion; Parallel PCG algorithm; Parallel virtual machine; Parallelization software; Preconditioned conjugate gradient; Shared memory machine", thesaurus = "Iterative methods; Neutron diffusion; Nuclear engineering computing; Parallel algorithms", } @InProceedings{McRae:1992:VC, author = "S. J. McRae", title = "{VM} communications", crossref = "Anonymous:1992:PSE", pages = "439--453", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Soft-Switch Ltd., Reading, UK", classification = "C6150J (Operating systems); C6155 (Computer communications software)", keywords = "3270 Protocols; APPC; Business needs; Client/server communications; Communication offerings; Communications infra-structure; IBM host system; LANRES; MVS; OSI connectivity; PVM; RSCS; SAA communications strategy; SNA connectivity; TCP/IP; TCP/IP connectivity; VM; VM/ESA; X.25 communications", thesaurus = "Computer communications software; Operating systems [computers]", } @InProceedings{Otto:1992:MAP, author = "S. W. Otto and M. Wolfe", title = "The {MetaMP} approach to parallel programming", crossref = "Siegel:1992:FFS", pages = "562--565", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oregon Graduate Inst., Beaverton, OR, USA", classification = "C6110P (Parallel programming); C6140D (High level languages)", keywords = "MetaMP; Parallel programming", thesaurus = "High level languages; Parallel programming", } @InProceedings{Shen:1992:VTD, author = "S. Shen and L. Kleinrock", title = "The virtual-time data-parallel machine", crossref = "Siegel:1992:FSF", pages = "46--53", year = "1992", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming)", conflocation = "McLean, VA, USA; 19-21 Oct. 1992", corpsource = "Dept. of Comput. Sci., California Univ., Los Angeles, CA, USA", keywords = "asynchronous execution; computation-intensive data-parallel; FIFO priory cache; parallel machines; parallel programming; processing element; programs; SIMD; single instruction multiple data; virtual-time data-parallel machine", sponsororg = "IEEE; NASA", treatment = "P Practical", } @InProceedings{Sunderam:1992:CCP, author = "Vaidy Sunderam", title = "Concurrent Computing with {PVM}", crossref = "SCRI:1992:PWC", year = "1992", bibsource = "Distributed/clusters.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Wolbers:1992:SPP, author = "S. Wolbers", title = "Software for parallel processing applications", crossref = "Verkerk:1992:PIC", pages = "111--116", year = "1992", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Fermilab, Batavia, IL, USA", classification = "A2980 (Nuclear information processing); C6110P (Parallel programming); C7320 (Physics and Chemistry)", keywords = "ACPMAPS; CANOPY; Cooperative processes software; High-energy physics; Lattice QCD; Monte Carlo generation; Offline event reconstruction; Parallel processing; Tightly-coupled machines; Workstation clusters", thesaurus = "Monte Carlo methods; Parallel programming; Physics computing", } @Article{Almasi:1993:PDS, author = "G. S. Almasi and T. McLuckie and J. Bell and A. Gordon", title = "Parallel distributed seismic migration", journal = j-CPE, volume = "5", number = "2", pages = "105--131", month = apr, year = "1993", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", classification = "A9130 (Seismology); C5440 (Multiprocessor systems and techniques); C7340 (Geophysics)", fjournal = "Concurrency, practice and experience", keywords = "15 MFLOPS; Ethernet; IBM RISC/6000 workstations; Linda; Parallel distributed seismic migration; Performance; Programming models; PVM; Remote procedure calls; Token ring", numericalindex = "Computer speed 1.5E+07 FLOPS", pubcountry = "UK", thesaurus = "Geophysics computing; Parallel processing; Seismology", } @Article{Altevogt:1993:PTD, author = "P. Altevogt and A. Linke", title = "Parallelization of the two-dimensional {Ising} model on a cluster of {IBM RISC System\slash 6000} workstations", journal = j-PARALLEL-COMPUTING, volume = "19", number = "9", pages = "1041--1052", month = sep, year = "1993", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sci. Center, IBM, Heidelberg, Germany", classification = "A0550 (Lattice theory and statistics; C5220P (Parallel architecture); C7320 (Physics and Chemistry); Ising problems)", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "IBM RISC System/6000 workstations; Metropolis algorithm; Multispin coding; NSC DX Router; PVM programming environment; Token ring; Two-dimensional Ising model", pubcountry = "Netherlands", thesaurus = "Ising model; Physics computing; Reduced instruction set computing", } @Article{Anonymous:1993:MMP, author = "Anonymous", title = "{MPI}: a message passing interface", journal = j-PROC-SUPERCOMPUT, pages = "878--883", month = "????", year = "1993", CODEN = "????", ISBN = "0-8186-4340-4", ISBN-13 = "978-0-8186-4340-8", ISSN = "1063-9535", LCCN = "QA76.5 .S894 1993", bibdate = "Fri May 24 09:57:40 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper presents an overview of MPI, a proposed standard message passing interface for MIMD distributed memory concurrent computers, The design of MPI has been a collective effort involving researchers in the United States and Europe from many organization and institutions. MPI includes point-to-point and collective communication routines, as well as support for process groups, communication contexts, and application topologies. While making use of new ideas where appropriate, the MPI standard is based largely on current practice.", acknowledgement = ack-nhfb, classification = "723; 902.2; C6150N (Distributed systems software)", conference = "Proceedings of the Supercomputing '93 Conference", conferenceyear = "1993", fjournal = "Proceedings of the Supercomputing Conference", journalabr = "Proc Supercomputing Conf", keywords = "Application topologies; application topologies; collective communication routines; Collective communication routines; collective communication routines; Communication contexts; communication contexts; Distributed computer systems; message passing; Message passing interface; MIMD distributed memory concurrent computers; MPI; MPI standard; MPI standard overview; Point-to-point communication; point-to-point communication; process groups; Process groups; process groups; software standards; standard message passing interface; Standard message passing interface; standard message passing interface; Standards", meetingaddress = "Portland, OR, USA", meetingdate = "Nov 15--19 1993", meetingdate2 = "11/15--19/93", publisherinfo = "Computer Society Press", sponsor = "IEEE Computer Society; ACM SIGARCH", sponsororg = "IEEE; ACM SIGARCH", treatment = "P Practical", } @Article{Anonymous:1993:MPI, author = "Anonymous", title = "Message-Passing Interface", journal = j-IJSA, volume = "7", number = "2", pages = "179--179", month = jun, year = "1993", CODEN = "IJSAE9", DOI = "https://doi.org/10.1177/109434209300700208", ISSN = "0890-2720", bibdate = "Tue Nov 6 11:28:49 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209300700208", acknowledgement = ack-nhfb, fjournal = "The International Journal of Supercomputer Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @TechReport{Arthur:1993:CUA, author = "Trey Arthur and Michael J. Bockelie", title = "A comparison of using {APPL} and {PVM} for a parallel implementation of an unstructured grid generation problem", number = "NASA CR-191425", institution = "National Aeronautics and Space Administration, Langley Research Center; National Technical Information Service, distributor", address = "Hampton, VA, USA", pages = "??", year = "1993", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "NASA contractor report", acknowledgement = ack-nhfb, annote = "Distributed to depository libraries in microfiche. Shipping list no.: 93-1026-M. Microfiche. [Washington, DC: National Aeronautics and Space Administration, 1993] 1 microfiche.", govtdocnumber = "NAS 1.26:191425 0830-H-14 (MF)", keywords = "Numerical grid generation (Numerical analysis)", } @InProceedings{Arthur:1993:PIU, author = "T. Arthur and M. Bockelie", title = "A Parallel Implementation of the Unstructured Grid Generation Program {VGRIDSG} Using {PVM} and {APPL}", crossref = "Sincovec:1993:SCP", pages = "899--902", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Baiardi:1993:PVM, author = "F. Baiardi and M. Jazayeri", title = "{P03M}: a Virtual Machine Approach to Massively Parallel Computing", journal = j-PROC-INT-CONF-PAR-PROC, pages = "I-340--??", month = "????", year = "1993", CODEN = "PCPADL", ISSN = "0190-3918", LCCN = "QA76.6.I548a", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Proceedings of the International Conference on Parallel Processing", } @InProceedings{Baraglia:1993:PWC, author = "R. Baraglia and D. Laforenza and R. Perego", title = "Programming a workstation cluster with {PVM} and {Linda}: a qualitative and quantitative comparison", crossref = "Anonymous:1993:ISA", pages = "101--114", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Barth:1993:CNM, author = "N. H. Barth and S. L. Smith", title = "Coupling Numerical Models of the Atmosphere and Ocean Using the Parallel Virtual Machine ({PVM}) Package", crossref = "Sincovec:1993:SCP", pages = "71--75", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Bedrosian:1993:MFA, author = "G. Bedrosian and R. W. Benway", title = "Magnetostatic finite-element analysis on {MIMD\slash DMMP} parallel computers", crossref = "Yelon:1993:PTS", journal = j-J-APPL-PHYS, volume = "73", number = "10", pages = "6772--6777", year = "1993", CODEN = "JAPIAU", ISSN = "0021-8979 (print), 1089-7550 (electronic), 1520-8850", ISSN-L = "0021-8979", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "GE Corporate Research and Development, Schenectady, NY, USA", classification = "A0260 (Numerical approximation and analysis); A4110D (Electrostatics, magnetostatics); B0290T (Finite element analysis); B5120 (Magnetostatics)", fjournal = "Journal of Applied Physics", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=4915369", keywords = "Distributed-memory; H3D; In-house magnetostatic finite-element analysis code; Intel iPSC/860 Hypercube; Local area network; Message-passing; MIMD/DMMP parallel computers; Multiple closely coupled CPUs; Multiple-data; Multiple-instruction; Networks of heterogeneous workstations; Parallel virtual machine; Porting; Supercomputers", thesaurus = "Finite element analysis; Magnetic fields", } @InProceedings{Beguelin:1993:PEC, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and S. Otto and J. Walpole", title = "{PVM}: {Experiences}, current status and future direction", crossref = "IEEE:1993:PSP", pages = "765--766", year = "1993", bibdate = "Thu Apr 16 08:51:18 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems software); C7320 (Physics and chemistry computing); C7410D (Electronic engineering computing)", corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", keywords = "circuit analysis; Circuit analysis; circuit analysis; computational problems; Computational requirements; computational requirements; computing; computing requirements; Computing requirements; computing requirements; concurrent; Concurrent computing; concurrent computing; concurrent processing; Concurrent processing; concurrent processing; electronic engineering computing; engineering design; Engineering design; engineering design; Hardware multiprocessors; hardware multiprocessors; high-; High-performance applications; high-performance applications; Integration aspects; integration aspects; material sciences; Material sciences; material sciences; multiprocessing programs; package; parallel processors; Parallel processors; parallel processors; parallel programming; parallel virtual machine; Parallel virtual machine; parallel virtual machine; performance applications; Physical sciences; physical sciences; physics computing; PVM; scientific; Scientific computational problems; scientific computational problems; Simulation; simulation; software; Software package; software package; software packages", sponsororg = "IEEE; ACM SIGARCH", treatment = "P Practical", } @InCollection{Beguelin:1993:PHT, author = "A. Beguelin and J. Dongarra and A. Geist and R. Manchek and K. Moore and V. Sunderam", editor = "J. S. Kowalik and L. Grandinetti", title = "{PVM} and {HeNCE}: Tools for Heterogeneous Network Computing", crossref = "Kowalik:1993:SPC", pages = "??--??", year = "1993", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Par.Arch.Indep.bib; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/Par.Arch.Indep.bib", acknowledgement = ack-nhfb, } @Article{Beguelin:1993:VDH, author = "Adam Beguelin and Jack Dongarra and Al Geist and V. Sunderam", title = "Visualization and Debugging in a Heterogeneous Environment", journal = j-COMPUTER, volume = "26", number = "6", pages = "88--95", month = jun, year = "1993", CODEN = "CPTRB4", ISSN = "0018-9162 (print), 1558-0814 (electronic)", ISSN-L = "0018-9162", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "A monitoring tool and a graphical interface working on top of the PVM software can help programmers make better use of heterogeneous networks of computers.", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems)", fjournal = "Computer", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2", keywords = "Debugging; Graphical monitoring package; Graphical programming environment; Hence; Heterogeneous distributed programs; Heterogeneous environment; Parallel virtual machine; Program visualisation; Xab", thesaurus = "Multiprocessing programs; Open systems; Parallel programming; Program debugging; Software tools; System monitoring; Virtual machines; Visual programming", } @InProceedings{Beguelin:1993:XAT, author = "Adam Beguelin", title = "Xab: a Tool for Monitoring {PVM} Programs", crossref = "IEEE:1993:WHP", pages = "92--97", year = "1993", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/debug_3.1.bib", } @TechReport{Beguelin:1993:XTMa, author = "Adam L. Beguelin", title = "Xab: a tool for monitoring {PVM} programs", type = "Research paper", number = "CMU-CS-93-164", institution = inst-SCS-CMU, address = inst-SCS-CMU:adr, pages = "8", year = "1993", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Techreports/tr.misc.bib", abstract = "Xab (X-window Analysis and deBugging) is a tool for run time monitoring of PVM (Parallel Virtual Machine) programs. PVM supports the programming of a network of heterogeneous computers as a single parallel computer. Using Xab, PVM programs can easily be instrumented and monitored. Xab uses PVM to monitor PVM programs. This makes Xab very portable but it leads to interesting issues of how to make Xab peacefully coincide with the programs it monitors. Xab consists of three main components, a user library, a monitoring program, and an X windows front end. The user library provides instrumented versions of the PVM calls. The monitoring program runs as a PVM process and gathers monitor events in the form of PVM messages. The Xab front end displays information graphically about PVM processes and messages. This paper discusses the design, implementation, and use of the Xab tool. Related work is briefly presented and contrasted with the approach taken with Xab. How Xab works and how it is used are discussed in detail. Finally, the current status of Xab is presented along with future directions of where the research may go from here.", acknowledgement = ack-nhfb, annote = "This paper also appears in the proceedings of the April 1993 Workshop on Heterogeneous Processing, IEEE Computer Society Press. June 2, 1993.", keywords = "Debugging in computer science; Parallel programming (Computer science)", } @InProceedings{Beguelin:1993:XTMb, author = "A. L. Beguelin", title = "Xab: a tool for monitoring {PVM} programs", crossref = "Mudge:1993:PTS", volume = "2", pages = "102--103 (vol. 2) (or 4--??)", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C7430 (Computer engineering)", corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", keywords = "Feedback; feedback; Heterogeneity; heterogeneity; heterogeneous; Heterogeneous multiprogramming environment; Monitoring PVM programs; monitoring PVM programs; multiprogramming; multiprogramming environment; Parallel virtual machine; parallel virtual machine; performance evaluation; program testing; Run time monitoring tool; run time monitoring tool; software tools; virtual machines; Xab", sponsororg = "ACM; IEEE", thesaurus = "Multiprogramming; Performance evaluation; Program testing; Software tools; Virtual machines", treatment = "P Practical", } @InProceedings{Castro-Leon:1993:MCP, author = "E. Castro-Leon", title = "A model of computation with parallel solvers", crossref = "Anonymous:1993:SEC", pages = "189--198", year = "1993", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Intel Supercomputer Syst. Div., Beaverton, OR, USA", classification = "C6110P (Parallel programming); C6115 (Programming support)", keywords = "Data parallel programming; Distributed memory computers; Message passing; Parallel libraries; Parallel solvers; Performance; Programming environments; Rehosting", thesaurus = "Distributed memory systems; Parallel programming; Programming environments", } @MastersThesis{Cavender:1993:APV, author = "Mark Edward Cavender", title = "Asynchronous parallel virtual machine", type = "M.S. thesis", school = "University of Texas at San Antonio. Division of Mathematics and Computer Science and Statistics", address = "San Antonio, TX, USA", pages = "vi + 228", year = "1993", bibdate = "Mon Jan 15 18:16:25 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Parallel processing (Electronic computers); Virtual computer systems.", } @InProceedings{Chandrasekharan:1993:RTB, author = "N. Chandrasekharan and V. Goel", title = "Ray tracing and binary tree computations using {PVM}", crossref = "Mudge:1993:PTS", volume = "2", pages = "104--105 (vol. 2)", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Univ. of Central Florida, Orlando, FL, USA", classification = "C6130B (Graphics techniques); C6150J (Operating systems); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Univ. of Central Florida, Orlando, FL, USA", keywords = "Binary tree computations; binary tree computations; Computational problems; computational problems; Parallel virtual machine; parallel virtual machine; problem; PVM; ray; Ray tracing; ray tracing; rendering (computer graphics); rendering computer; Rendering computer synthesized images; scheduling; Scheduling technique; scheduling technique; synthesized images; tracing; tree contraction; Tree contraction problem; virtual machines", sponsororg = "ACM; IEEE", thesaurus = "Ray tracing; Rendering [computer graphics]; Scheduling; Virtual machines", treatment = "A Application; P Practical", } @Article{Chatterjee:1993:GLA, author = "S. Chatterjee and J. R. Gilbert and F. J. E. Long and R. Schreiber and S.-H. Teng", title = "Generating local addresses and communication sets for data-parallel programs", journal = j-SIGPLAN, volume = "28", number = "7", pages = "149--158", month = jul, year = "1993", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C4220 (Automata theory); C6110P (Parallel programming); C6140D (High level languages)", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "Communication sets; Data-parallel languages; Distributed-memory implementations; Fast algorithms; Local memory access sequence; Multidimensional arrays; State machines", thesaurus = "Distributed memory systems; Finite automata; FORTRAN; Parallel programming", } @InProceedings{Colombet:1993:SMI, author = "L. Colombet and L. Desbat and F. Menard", title = "Star Modeling on {IBM RS6000} Networks Using {PVM}", crossref = "IEEE:1993:PIS", pages = "121--128", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "LMC-IMAG, Grenoble, France", classification = "C5220P (Parallel architecture); C5470 (Performance evaluation and testing)", corpsource = "LMC-IMAG, Grenoble, France", keywords = "architectures; evaluation; Heterogeneous networks; heterogeneous networks; heterogeneous parallel; Heterogeneous parallel architectures; IBM RS6000; IBM RS6000 networks; Monte Carlo methods; Monte Carlo radiative transfer code; networks; parallel; parallel architectures; Parallel performances; parallel virtual machine; Parallel virtual machine; performance; performances; PVM; star modelling; Star modelling", sponsororg = "IEEE; Washington State Univ.; NPAC at Syracuse Univ.; ACM; Washington Technol. Center", thesaurus = "Monte Carlo methods; Parallel architectures; Performance evaluation", treatment = "P Practical", } @InProceedings{Coussement:1993:PMO, author = "G. Coussement", title = "Parallelization of a mesh optimization code on a {RS\slash} 6000 cluster", crossref = "Anonymous:1993:PSE", pages = "185--212", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Aerodynamics Dept., Office Nat. d'Etudes et de Recherches Aerospatiales, Chatillon, France", classification = "C1180 (Optimisation techniques); C6110P (Parallel programming)", keywords = "Code structure; Communication protocol; IBM RS/6000; Multi-domain structured mesh optimization code; OPTIM3D; Parallelization effort; PVM; Three-dimensional mesh optimization method", thesaurus = "IBM computers; Optimisation; Parallel programming", } @Article{Culler:1993:LTR, author = "David E. Culler and Richard M. Karp and David A. Patterson and Abhijit Sahay and Klaus E. Schauser and Eunice Santos and Ramesh Subramonian and Thorsten von Eicken", title = "{LogP}: towards a realistic model of parallel computation", journal = j-SIGPLAN, volume = "28", number = "7", pages = "1--12", month = jul, year = "1993", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 14 18:49:37 MST 1995", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Comput. Sci., California Univ., Berkeley, CA, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C7430 (Computer engineering)", confdate = "19-22 May 1993", conflocation = "San Diego, CA, USA", confsponsor = "ACM", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "CM-5; Communication bandwidth; Communication delay; Computing bandwidth; LogP; Machine configuration; Machine designers; Parallel computers; Parallel machine model; Portable parallel algorithms", thesaurus = "Parallel algorithms; Parallel machines; Parallel programming; Virtual machines", } @InProceedings{daCunha:1993:PLA, author = "R. D. da Cunha and T. Hopkins", title = "Porting linear algebra subroutines from transputers to clusters of workstations", crossref = "Grebe:1993:TAS", pages = "660--667", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Comput. Lab., Kent Univ., Canterbury, UK", classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C6110B (Software engineering techniques); C6150N (Distributed systems); C7310 (Mathematics)", keywords = "Fortran77; Linear algebra subroutines; Message-passing system; Occam2; Parallel Virtual Machine; PVM; Subroutine porting; Transputers; Workstation clusters", thesaurus = "FORTRAN; Linear algebra; Mathematics computing; Message passing; Occam; Software portability; Subroutines; Transputer systems", } @Article{Damodaran-Kamal:1993:NTD, author = "S. K. Damodaran-Kamal and J. M. Francioni", title = "Nondeterminacy: testing and debugging in message passing parallel programs", journal = j-SIGPLAN, volume = "28", number = "12", pages = "118--128", month = dec, year = "1993", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Southwestern Louisiana Univ., Lafayette, LA, USA", classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems)", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "Debugging tool; Mdb; Message passing parallel programs; Nondeterminacy; Parallel program; Program errors; PVM programs; Testing tool", thesaurus = "Message passing; Parallel programming; Program debugging; Program testing", } @InProceedings{Despons:1993:CCP, author = "R. Despons and T. Muntean", title = "Constructing correct protocols for a diffusion virtual machine in message passing parallel architectures", crossref = "Grebe:1993:TAS", pages = "465--480", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C5640 (Protocols); C6150N (Distributed systems); C7430 (Computer engineering)", corpsource = "IMAG-LGI Lab., Grenoble Univ., France", keywords = "architectures; communication protocols; diffusion protocols; diffusion virtual machine; machines; massively parallel architectures; message passing; parallel; parallel algorithms; parallel applications; parallel architectures; parallel machines; programming environments; protocols; virtual machines", pubcountry = "Netherlands", treatment = "P Practical", } @InProceedings{Dongarra:1993:DSM, author = "J. J. Dongarra and R. Hempel and A. J. G. Hey and D. W. Walker", title = "A draft standard for message passing in a distributed memory environment", crossref = "Hoffmann:1993:PFE", pages = "465--481", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C6150N (Distributed systems)", keywords = "C language; Data distribution transformations; Distributed memory environment; Draft standard; Fortran 77; Library interface standard; Message passing; Message Passing Interface 1; Message selectivity; Message type; MPI1; Source process", thesaurus = "Distributed memory systems; Message passing", } @Article{Dongarra:1993:IPF, author = "Jack Dongarra and G. A. Geist and Robert Manchek and V. S. Sunderam", title = "Integrated {PVM} Framework Supports Heterogeneous Network Computing", journal = j-COMPUT-PHYS, volume = "7", number = "2", pages = "166--174 (or 166--175??)", month = mar # "--" # apr, year = "1993", CODEN = "CPHYE2", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Tennessee Univ., Knoxville, TN, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems); C7300 (Natural sciences); C7430 (Computer engineering)", corpsource = "Tennessee Univ., Knoxville, TN, USA", fjournal = "Computers in Physics", keywords = "computer networks; computing; Concurrent applications; concurrent applications; distributed processing; Heterogeneous network computing; heterogeneous network computing; Integrated framework; integrated framework; natural sciences; parallel programming; Parallel virtual machine software; parallel virtual machine software; Scientific computations; scientific computations; virtual machines", thesaurus = "Computer networks; Distributed processing; Natural sciences computing; Parallel programming; Virtual machines", treatment = "G General Review; P Practical", } @TechReport{Dongarra:1993:PUM, author = "J. Dongarra and R. Hempel and A. Hay and D. Walker", title = "A Proposal for a User-Level Message Passing Interface in a Distributed Memory Environment", type = "Technical Report", number = "ORNL/TM-12231", institution = inst-ORNL, address = inst-ORNL:adr, month = feb, year = "1993", bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/par.lin.alg.bib", } @InProceedings{Dongarra:1993:UPR, author = "J. J. Dongarra and A. Geist and R. Manchek and W. Jiang", title = "Using {PVM} 3.0 to Run Grand Challenge Applications on a Heterogeneous Network of Parallel Computers", crossref = "Sincovec:1993:SCP", pages = "873--877", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Ewing:1993:DCW, author = "R. E. Ewing and D. Mitchum and P. O'Leary and R. C. Sharpley and J. S. Sochacki", title = "Distributed Computation of Wave Propagation Models Using {PVM}", crossref = "IEEE:1993:PSP", pages = "22--31", year = "1993", bibdate = "Wed Apr 15 12:04:03 MDT 1998", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Texas A\&M Univ", affiliationaddress = "College Station, TX, USA", classification = "484.1; 723; 921; C5440 (Multiprocessing systems); C6110P (Parallel programming); C7340 (Geophysics computing); C7430 (Computer engineering)", corpsource = "Inst. for Sci. Comput., Texas A and M Univ., College Station, TX, USA", keywords = "Computer simulation; Computer workstations; distributed computation; distributed memory systems; Earth; geophysics computing; handling large-scale problems; IBM RS/6000s; Large-earth models; large-scale computations; Large-scale problems; nodes; numerical approximation; parallel processing; parallel processing environment; Parallel processing environment; Parallel processing systems; Parallel Virtual Machine; Parallel virtual machine (PVM); PVM; Seismic wave propagation; seismic waves; Seismic waves; supercomputers; timings; virtual machines; visualization; wave propagation; Wave propagation; wave propagation models; Wave propagation models; workstations", sponsororg = "IEEE; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Fritscher:1993:PDC, author = "J. F. Fritscher and F. Sukup", title = "{93SC038} Parallel Distributed Computing Using {PVM}", crossref = "Anonymous:1993:ATA", pages = "221--228", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Geist:1993:EPC, author = "G. A. Geist and V. S. Sunderam", title = "The evolution of the {PVM} concurrent computing system", crossref = "IEEE:1993:DPC", pages = "549--557", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., TN, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C7430 (Computer engineering)", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "future; Future trends; high-performance computations; High-performance computations; historical evolution; Historical evolution; networked environments; Networked environments; ongoing research projects; Ongoing research projects; parallel; parallel machines; parallel programming; Parallel programming; Parallel Virtual Machine; programming; programming model; Programming model; PVM concurrent computing system; scientific; Scientific supercomputing; software infrastructure; Software infrastructure; supercomputing; trends; virtual machines", thesaurus = "Parallel machines; Parallel programming; Virtual machines", treatment = "P Practical", } @InProceedings{Geist:1993:ILP, author = "G. A. Geist", title = "Invited Lecture: {PVM} 3 Beyond Network Computing", crossref = "Volkert:1993:PCS", pages = "194--203", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Geist:1993:PBN, author = "G. A. Geist", title = "{PVM} 3 beyond network computing", crossref = "Volkert:1993:PCS", pages = "194--203", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., TN, USA", classification = "C6115 (Programming support); C6150N (Distributed systems); C7430 (Computer engineering)", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "distributed computing; Distributed computing; distributed memory computer; Distributed memory computer; distributed memory systems; heterogeneous network computing; Heterogeneous network computing; machines; message-; Message-passing constructs; parallel machines; Parallel Virtual Machine; passing constructs; PVM 3; software package; Software package; virtual", pubcountry = "Germany", thesaurus = "Distributed memory systems; Parallel machines; Virtual machines", treatment = "P Practical", } @InProceedings{Geist:1993:PTW, author = "A. Geist and J. Dongarra and A. Beguelin and B. Manchek and Weicheng Jiang", title = "{PVM} takes over the world", crossref = "IEEE:1993:PSP", pages = "618--618", year = "1993", DOI = "https://doi.org/10.1109/SUPERC.1993.1263513", bibdate = "Fri May 27 10:20:49 2005", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Glendinning:1993:MMP, author = "I. Glendinning", title = "{93SC041} The {MPI} Message Passing Interface", crossref = "Anonymous:1993:ATA", pages = "229--236", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Hariri:1993:MPI, author = "S. Hariri and J. B. Park and F.-K. Yu and M. Parashar and G. C. Fox", title = "A message passing interface for parallel and distributed computing", crossref = "IEEE:1993:PIS", pages = "84--91", year = "1993", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6150M (Protocols); C5640 (Protocols); C5670 (Network performance)", corpsource = "Northeast Parallel Archit. Center, Syracuse Univ., NY, USA", keywords = "architectural support; communication protocol; distributed computing; distributed processing; gigabit networks; message passing; message passing interface; parallel computing; parallel processing; performance evaluation; protocols; supercomputing capabilities", sponsororg = "IEEE; Washington State Univ.; NPAC at Syracuse Univ.; ACM; Washington Technol. Center", treatment = "P Practical", } @InProceedings{Hartley:1993:CPS, author = "C. L. Hartley and V. S. Sunderam", title = "Concurrent programming with shared objects in networked environments", crossref = "IEEE:1993:PSI", pages = "471--478", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "C6110J (Object-oriented programming); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems)", keywords = "Application development; Concurrent programming; Distributed computing; Ease of use; Message passing; Networked computing platforms; Networked environments; Object-oriented techniques; Partitioning; Portable software systems; PVM distributed computing system; Scheduling; Shared objects; Shared-object concurrent computation; Synchronization; Toolkit", thesaurus = "Multiprocessing programs; Object-oriented programming; Parallel programming; Software tools", } @InProceedings{Hebeker:1993:CPC, author = "F.-K. Hebeker", title = "On a coarse-grained parallel code to simulate reactive flows on an {IBM RS\slash} 6000 workstation-cluster", crossref = "Brebbia:1993:ASE", pages = "253--262", year = "1993", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM Heidelberg Sci. Center, Germany", classification = "A4710 (General fluid dynamics theory, simulation and other computational methods); A4770F (Chemically reactive flows); C6110P (Parallel programming); C7440 (Civil and mechanical engineering computing); C7450 (Chemical engineering computing)", keywords = "Algorithmic development; Chemical source terms; Coarse-grained parallel code; Compressible flow; Compressible Navier--Stokes equations; Domain splitting techniques; Engineering-mathematical modelling; Global exothermic reaction chemistry; IBM RS/6000 workstation cluster; Internal combustion engines; Knock damage; Message passing; Numerical simulation; Optimally adapted code; Performance measurements; PVM programming environment; Reactive flow simulation; Semi-implicit treatment; Shock-capturing finite-volume scheme", thesaurus = "Chemical engineering computing; Chemically reactive flow; Digital simulation; Distributed algorithms; Flow simulation; IBM computers; Internal combustion engines; Mechanical engineering computing; Message passing; Parallel programming", } @Article{Jesshope:1993:LRV, author = "C. Jesshope", title = "Latency reduction in {VLSI} routers", journal = j-PARALLEL-PROCESS-LETT, volume = "3", number = "4", pages = "485--494", month = dec, year = "1993", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electron. and Electr. Eng., Surrey Univ., Guildford, UK", classification = "C5220P (Parallel architecture); C5470 (Performance evaluation and testing)", fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", keywords = "Latency reduction; MPI router chip; Parallel computers; Scalable performance; VLSI routers", pubcountry = "Singapore", thesaurus = "Fault tolerant computing; Parallel architectures; Performance evaluation; VLSI", } @InProceedings{Jesshope:1993:MCA, author = "C. Jesshope", title = "The {MPI} Chip and its Applications", crossref = "Anonymous:1993:JFI", pages = "47--54", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @MastersThesis{Katamneni:1993:PPE, author = "Sreevenu Katamneni", title = "Parallel processing extensions to {Verilog HDL} using the {PVM} environment", type = "M.S.E.E. thesis", school = inst-UAL-EE, address = inst-UAL-EE:adr, pages = "viii + 108", year = "1993", bibdate = "Mon Jan 15 18:16:30 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Computer hardware description languages.; Parallel processing (Electronic computers); Verilog (Computer hardware description language); Virtual computer systems.", } @Article{Kikuchi:1993:PAS, author = "S. Kikuchi", title = "Parallelization assist system", journal = j-JOHO-SHORI, volume = "34", number = "9", pages = "1158--1169", month = sep, year = "1993", CODEN = "JOSHA4", ISSN = "0447-8053", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Central Res. Lab., Hitachi Ltd, Tokyo, Japan", classification = "C6110P (Parallel programming); C6115 (Programming support)", fjournal = "Joho-Shori (J. Information Processing Soc. Japan)", keywords = "ASPAR; Express; Flow-insensitive systems; Flow-sensitive systems; FORGE9O; KAP; Message passing interface standards; MIMDizer; ParaGraph; Parallel Fortran Converter; Parallelization assistance system; ParaScope Editor; Parassist; Perfect club benchmarks; PIE; Portable Instrumented Communication Library; Profiling tools; PTOOL; SUPERB; SUPRENUM FORTRAN; Transformations; VAST", language = "Japanese", pubcountry = "Japan", thesaurus = "FORTRAN; Parallel programming; Reduced instruction set computing; Software tools", } @Article{Kranz:1993:IMP, author = "David Kranz and Kirk L. Johnson and Anant Agarwal and John Kubiatowicz and Beng-Hong Lim", title = "Integrating message-passing and shared-memory: early experience", journal = j-SIGPLAN, volume = "28", number = "7", pages = "54--63", month = jul, year = "1993", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Dec 14 18:49:37 MST 1995", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "A discussion is given on some of the issues involved in implementing a shared-address space programming model on large-scale, distributed-memory multiprocessors. While such a programming model can be implemented on both shared-memory and message-passing architectures, the authors argue that the transparent, coherent caching of global data provided by many shared-memory architectures is of crucial importance. Because message-passing mechanisms are much more efficient than shared-memory loads and stores for certain types of interprocessor communication and synchronization operations, however, the authors argue for building multiprocessors that efficiently support both shared-memory and message-passing mechanisms. The authors describe an architecture, Alewife, that integrates support for shared-memory and message-passing through a simple interface; they expect the compiler and runtime system to cooperate in using appropriate hardware mechanisms that are most efficient for specific operations. They report on both integrated and exclusively shared-memory implementations of the runtime system and two applications.", acknowledgement = ack-nhfb, affiliation = "Lab. for Comput. Sci., MIT, Cambridge, MA, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming)", confdate = "19-22 May 1993", conflocation = "San Diego, CA, USA", confsponsor = "ACM", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "Alewife; Coherent caching; Compiler; Distributed-memory multiprocessors; Exclusively shared-memory implementations; Global data; Hardware mechanisms; Interprocessor communication; Message-passing architectures; Message-passing mechanisms; Runtime system; Shared-address space programming model; Shared-memory architectures; Shared-memory loads; Synchronization operations", thesaurus = "Message passing; Parallel programming; Shared memory systems", } @TechReport{Leon:1993:FPA, author = "J. Leon and A. L. Fisher and P. Steenkiste", title = "Fail-safe {PVM}: a portable package for distributed programming with transparent recovery", number = "CMU-CS-93-124", institution = "Carnegie-Mellon University, Department of Computer Science", year = "1993", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Techreports/tr.misc.bib", } @TechReport{Leon:1993:FPP, author = "Juan Leon and Allan L. Fisher and Peter Alfons Steenkiste", title = "Fail-safe {PVM}: a portable package for distributed programming with transparent recovery", institution = inst-SCS-CMU, address = inst-SCS-CMU:adr, pages = "22", year = "1993", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "[Research paper] / Carnegie Mellon University. School of Computer Science; CMU-CS-93-124 Research paper (Carnegie Mellon University. School of Computer Science); CMU-CS-93-124", abstract = "Many scientific problems benefit from computations that are parallel at a coarse grain. Collections of loosely-coupled, heterogeneous computers are increasingly being applied to these problems. While individual computers are designed to be relatively reliable, a collection of several autonomous machines necessarily has a greater rate of failure. As data networks improve, and larger multicomputers are being used, rates of failure will increase. PVM (Parallel Virtual Machine) [Sun90, GS92] is a popular software framework that facilitates message-passing network programming. We present enhancements to PVM to mask fail-stop, single-node failures from the application. Fail-safe PVM uses checkpoint and rollback to recover from such failures. Both checkpoints and rollbacks are transparent to the application if the application does not depend on real-time events. Recovery occurs without wait for repair of the failed computer. The system does not rely on shared stable storage and does not require modifications to the operating system. We describe the design and implementation of fail-safe PVM, present meassurements [sic] of checkpoint costs, and briefly discuss shortcomings and potential avenues for improvement.'' Supported in part by the Defense Advanced Research Projects Agency, issued by DARPA/CMO.", acknowledgement = ack-nhfb, annote = "February 1993.", keywords = "Fault-tolerant computing", } @InProceedings{Levesque:1993:SAA, author = "J. M. Levesque and R. Friedman", title = "The state of the art in automatic parallelisation", crossref = "Anonymous:1993:SEC", pages = "95--107", year = "1993", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Appl. Parallel Res. Inc., Placerville, CA, USA", classification = "C6110P (Parallel programming)", keywords = "Automatic parallelisation; Data Distribution Directives; Distributed memory; Fortran programs; Parallelization; Shared memory; User assistance", thesaurus = "FORTRAN; Parallel programming", } @InProceedings{Lewis:1993:PCP, author = "M. J. Lewis and R. E. {Cline, Jr.}", title = "{PVM} Communication Performance in a Switched {FDDI} Heterogeneous Distributed Computing Environment", crossref = "Bhargava:1993:PIW", pages = "13--19", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sandia Nat. Labs., Livermore, CA, USA", classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C5620L (Local area networks); C5640 (Protocols)", corpsource = "Sandia Nat. Labs., Livermore, CA, USA", keywords = "distributed processing; distributed systems; Distributed systems; FDDI; heterogeneous distributed computing; Heterogeneous distributed computing; local area networks; machines; message; message passing system; Message passing system; parallel; parallel machines; Parallel programs; Parallel Virtual Machine; passing; performance evaluation; programs; PVM; switched FDDI; Switched FDDI; virtual", sponsororg = "IEEE", thesaurus = "Distributed processing; FDDI; Local area networks; Message passing; Parallel machines; Performance evaluation; Virtual machines", treatment = "P Practical", } @InProceedings{Li:1993:MSU, author = "Q. Li and T. G. Yip", title = "Monitoring Systems Using {PVM}", crossref = "Law:1993:EDM", pages = "781--785", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Li:1993:SLL, author = "Q. Li and J.-C. Liu and T. G. Yip", title = "Solving Large Linear Equations Using {PVM} System", crossref = "Law:1993:EDM", pages = "685--690", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Loyot:1993:VVM, author = "E. C. {Loyot, Jr.} and A. S. Grimshaw", title = "{VMPP}: a virtual machine for parallel processing", crossref = "IEEE:1993:PSI", pages = "735--740", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150C (Compilers, interpreters and other processors); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Virginia Univ., Charlottesville, VA, USA", keywords = "front-end translators; parallel languages; parallel processing; parallel source languages; portability; program interpreters; software; virtual machine; virtual machines; VMPP", sponsororg = "IEEE Comput. Soc.; ACM Sigarch", treatment = "P Practical", } @InProceedings{Maly:1993:DCP, author = "K. Maly and M. Zubair and S. Kelbar", title = "Distributed computing with parallel networking", crossref = "IEEE:1993:PFW", pages = "375--379", year = "1993", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Comput. Sci. Dept., Old Dominion Univ., Norfolk, VA, USA", classification = "B6150M (Protocols); B6210L (Computer communications); C5620L (Local area networks); C5640 (Protocols); C5670 (Network performance)", keywords = "Application performance; Communication network; Communication network performance; Dedicated parallel machine; Distributed computing; Ethernet; Parallel networking; Parallel virtual machine environment; PPVM; PVM; Round robin scheduling", thesaurus = "Local area networks; Performance evaluation; Protocols; Scheduling", } @Article{Matrone:1993:LPC, author = "A. Matrone and P. Schiano and V. Puoti", title = "{LINDA} and {PVM}: a comparison between two environments for parallel programming", journal = j-PARALLEL-COMPUTING, volume = "19", number = "8", pages = "949--957", month = aug, year = "1993", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Short communication.", acknowledgement = ack-nhfb, affiliation = "Centro Italiano Ricerche Aerospaziali, Capua, Italy", classification = "C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Centro Italiano Ricerche Aerospaziali, Capua, Italy", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "Distributed memory machines; distributed memory machines; environments; LINDA; Message passing; message passing; Parallel programming; parallel programming; programming; Programming environments; programming environments; PVM; RISC/6000", pubcountry = "Netherlands", thesaurus = "Parallel programming; Programming environments", treatment = "P Practical", } @Article{McKinney:1993:MMI, author = "G. W. McKinney and J. T. West", title = "Multiprocessing {MCNP} on an {IBM RS\slash} 6000 cluster", journal = j-TRANS-AM-NUCL-SOC, volume = "68", number = "pt.A", pages = "212--214", year = "1993", CODEN = "TANSAO", ISSN = "0003-018X", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Los Alamos Nat. Lab., NM, USA", classification = "A0250 (Probability theory, stochastic processes, and statistics); A0270 (Computational techniques); A0560 (Transport processes: theory); A2820H (Neutron diffusion); A2841C (Computer codes); C7320 (Physics and Chemistry); C7470 (Nuclear engineering)", fjournal = "Transactions of the American Nuclear Society", keywords = "Electron transport; IBM RS/6000 cluster; MCNP; Monte Carlo; Multiuser environment; Neutron transport; Parallel Virtual Machine; Photon transport; PVM version; Reduced Instruction Set Computer; Workstation cluster", thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear engineering computing; Photon transport theory; Physics computing; Transport processes", } @Article{Michielse:1993:PMU, author = "P. Michielse", title = "Parallel multigrid using {PVM}", journal = j-SUPERCOMPUTER, volume = "10", number = "6", pages = "10--23", month = "????", year = "1993", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Convex Computer, Utrecht, Netherlands", classification = "C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessor systems and techniques)", corpsource = "Convex Computer, Utrecht, Netherlands", fjournal = "Supercomputer", keywords = "algorithms; Convex; Convex MetaSeries machines; differential equations; distributed memory systems; Distributed memory systems; MetaSeries machines; Parallel; parallel; parallel machines; parallel multigrid method; Parallel multigrid method; Parallel Virtual Machine; PVM; shared memory systems; Shared memory systems; Virtual Machine; virtual machines", pubcountry = "Netherlands", thesaurus = "Differential equations; Distributed memory systems; Parallel algorithms; Parallel machines; Shared memory systems; Virtual machines", treatment = "P Practical", } @Article{Nanayakkara:1993:PIR, author = "A. Nanayakkara and D. Moncrieff and S. Wilson", title = "Performance of {IBM RISC System\slash 6000} workstation clusters in a quantum chemical application", journal = j-PARALLEL-COMPUTING, volume = "19", number = "9", pages = "1053--1062", month = sep, year = "1993", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Supercomputer Comput. Res. Inst., Florida State Univ., Tallahassee, FL, USA", classification = "C5430 (Microcomputers); C5470 (Performance evaluation and testing); C7320 (Physics and Chemistry)", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "Concurrent computation many-body perturbation theory; CRAY Y-MP C-90; Electron correlation energy calculations; IBM RISC System/6000 workstation clusters; NEC SX-3/44 computers; Parallel virtual machine system; Performance", pubcountry = "Netherlands", thesaurus = "Chemistry computing; IBM computers; Performance evaluation; Quantum chemistry; Reduced instruction set computing; Workstations", } @Article{Nelson:1993:PPP, author = "M. L. Nelson", title = "{PVM} provides power in the public domain", journal = j-PARALLELOGRAM, volume = "53", pages = "20--21", month = may # "--" # jun, year = "1993", CODEN = "PRALEH", ISSN = "0953-7252", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessor systems and techniques); C6150N (Distributed systems); C7430 (Computer engineering)", fjournal = "Parallelogram", keywords = "de; De facto standard; distributed computing; Distributed computing; ease-of-use; Ease-of-use; facto standard; heterogeneous computer network; Heterogeneous computer network; maintenance; Maintenance; message passing; message-passing system; Message-passing system; parallel machines; parallel programming; Parallel virtual machine; performance; Performance; public domain software; Public domain software; PVM; robustness; Robustness; software packages; virtual machines", pubcountry = "UK", thesaurus = "Message passing; Parallel machines; Parallel programming; Public domain software; Software packages; Virtual machines", treatment = "P Practical; R Product Review", } @TechReport{Oed:1993:CRM, author = "Wilfried Oed", title = "The {Cray Research} Massively Parallel Processor System {CRAY T3D}", institution = "Cray Research GmbH", address = "M{\"u}nchen, Germany", month = nov # " 15", year = "1993", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/Parallel.io.bib", comment = "A MIMD, shared-memory machine, with 2-processor units embedded in a 3-d torus. Each link is bidirectional and runs 300 MB/s. Processors are 150 MHz ALPHA, plus 16--64 MB RAM, plus a memory interface unit. Global physical address space with remote-reference and block-transfer capability. Not clear about cache coherency. Separate tree network for global synchronization. Support for message send and optional interrupt. I/O is all done through interface nodes that hook to the YMP host and to its I/O clusters with 400 MB/s links. I/O is by default serialized, but they do support a ``broadcast'' read operation (but see pase:t3d-fortran). FORTRAN compiler supports the NUMA shared memory; PVM is used for C and message passing.", keyword = "parallel architecture, shared memory, supercomputer, parallel I/O, pario bib", } @Article{Otto:1993:PAC, author = "S. W. Otto", title = "Parallel array classes and lightweight sharing mechanisms", journal = j-SCI-PROG, volume = "2", number = "4", pages = "203--216", month = "Winter", year = "1993", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Beaverton, OR, USA", classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110J (Object-oriented programming); C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems software)", fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", keywords = "C++; Collective object; Distributed memory; Distributed memory architectures; Finite difference stencils; Finite element method; Intel NX message passing systems; Interpolation/contraction operations; Lightweight sharing mechanisms; Low level message passing; Meaningful array operations; MetaMP; Multigrid algorithms; Parallel array classes; Particle in cell algorithms; Partitioned array; PVM; Shared memory architectures; Transparent guard strips; Weak memory coherence", thesaurus = "Abstract data types; Distributed memory systems; Message passing; Object-oriented programming; Parallel programming", } @InProceedings{Parsons:1993:EDC, author = "I. Parsons", title = "Evaluation of distributed communication systems", crossref = "Gawman:1993:PCT", pages = "956--970 vol.2", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Alberta Univ., Edmonton, Alta., Canada", classification = "C0310H (Equipment and software evaluation methods); C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", keywords = "Balancing act; Communication systems; Concert/C; Distributed communication systems; Distributed parallel programs; Enterprise project; Handcrafted code; ISIS; Network of workstations; NMP; Programming environment; PVM; Software engineers", thesaurus = "Network operating systems; Parallel programming; Program testing; Programming environments; Software selection", } @MastersThesis{Patterson:1993:PPE, author = "Christopher S. Patterson", title = "Parametric Positron Emission Tomographic imaging using Parallel Virtual Machine: with an example using Myocardial Blood Flow analysis", type = "M.S. thesis", school = inst-UTK, address = inst-UTK:adr, pages = "x + 132", year = "1993", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Blood flow --- Measurement.; Tomography, Emission.; Virtual computer systems", } @InProceedings{Rabenseifner:1993:CDR, author = "R. Rabenseifner and A. Schuch", title = "Comparison of {DCE RPC}, {DFN-RPC}, {ONC} and {PVM}", crossref = "Schill:1993:DOD", pages = "39--46", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Rechenzentrum, Stuttgart Univ., Germany", classification = "C5670 (Network performance); C6115 (Programming support); C6150N (Distributed systems software); C6150N (Distributed systems)", conflocation = "Karlsruhe, Germany; 7-8 Oct. 1993", conftitle = "International DCE Workshop. DCE --- The OSF Distributed Computing Environment Client/Server Model and Beyond", corpsource = "Rechenzentrum, Stuttgart Univ., Germany", keywords = "account; applications; C; C applications; calls; Capability; capability; Computer server; computer server; DCE RPC; DFN-RPC; Early Participation; Early Participation Program; FORTRAN; Fortran applications distribution; Functionality; functionality; German Research Network; German Research Network Society; IBM computers; IBM RS/6000 workstations; message passing; Message passing library; message passing library; network servers; ONC; open systems; OSF Distributed Computing Environment; Parallelization; parallelization; Performance; performance; performance evaluation; Program; PVM; remote procedure; Remote procedure calls; scientific-technical; Scientific-technical applications; Society; software tools; SUN RPC; System programming tool; system programming tool; systems analysis; Unix; UNIX computer network; user-; User-account; workstations", pubcountry = "Germany", thesaurus = "FORTRAN; IBM computers; Message passing; Network servers; Open systems; Performance evaluation; Remote procedure calls; Software tools; Systems analysis; Unix; Workstations", treatment = "P Practical", } @Article{Robinson:1993:ECD, author = "D. F. Robinson and D. Judd and P. K. McKinely and B. H. C. Cheng", title = "Efficient collective data distribution in all-port wormhole-routed hypercubes", journal = j-PROC-SUPERCOMPUT, pages = "792--801", month = "????", year = "1993", CODEN = "????", ISBN = "0-8186-4340-4", ISBN-13 = "978-0-8186-4340-8", ISSN = "1063-9535", LCCN = "QA76.5 .S894 1993", bibdate = "Fri May 24 09:57:40 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper addresses the problem of collective data distribution, specifically multicast, in wormhole-routed hypercubes. The system model allows a processor to send and receive dataa in all dimensions simultaneously. New theoretical results that characterize contention among messages in wormhole-routed hypercubes are developed and used to design new multicast routing algorithms. The algorithms are compared in terms of the number of steps required in each, their measured execution times when implemented on a relatively small-scale nCUBE-2, and their simulated execution times on larger hypercubes.", acknowledgement = ack-nhfb, affiliation = "Michigan State Univ", affiliationaddress = "East Lansing, MI, USA", classification = "723", conference = "Proceedings of the Supercomputing '93 Conference", conferenceyear = "1993", fjournal = "Proceedings of the Supercomputing Conference", journalabr = "Proc Supercomputing Conf", keywords = "Algorithms; Message passing interface (MPI); Multicast routing algorithms; Parallel processing systems; Small-scale nCUBE-2; Wormhole-routed hypercubes", meetingaddress = "Portland, OR, USA", meetingdate = "Nov 15--19 1993", meetingdate2 = "11/15--19/93", publisherinfo = "Computer Society Press", sponsor = "IEEE Computer Society; ACM SIGARCH", } @MastersThesis{Sept:1993:DIP, author = "Doug Sept", title = "The design, implementation and performance of a queue manager for {PVM}", type = "M.S. thesis", school = "Computer Science Department, " # inst-UTK, address = inst-UTK:adr, pages = "viii + 45", year = "1993", bibdate = "Mon Jan 15 18:16:36 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Technical report CS-93-196: University of Tennessee, Knoxville, Computer Science Department", abstract = "The PVM Queue Manager (QM) application addresses some of the load balancing problems associated with the heterogeneous, multi-user, computing environments for which PVM was designed. In such environments, PVM is not only confronted with the difficulties of distributing tasks among machines of variable loads, it must also contend with machines of varying performance levels in the same virtual machine. The QM addresses both of these problems using two different load balancing techniques, one static, the other dynamic. In its simplest (static) mode, the QM will initiate PVM processes for the user on demand, taking into account information such as the peak megaflops/sec and actual load of each machine. In addition to the initiation of processes, the QM will also accept tasks to be completed by a specified PVM process type. These tasks are shipped to the QM where they are kept in a FIFO queue. Worker processes in the virtual machine send idle messages to the QM when they are ready for a task, and the QM ships a task to the process if there is one (of a type matching the process) in the queue. The QM also maintains a list of idle processes and chooses the best one for the task, should one arrive when several processes are idle. Since faster machines typically send more idle messages (and receive more tasks) than slower ones, this provides a level of dynamic load balancing for the system. Three applications have already been implemented using the QM within PVM: a Mandelbrot image generator, a conjugate-gradient algorithm, and a map analysis program used in landscape ecology applications. Benchmarks of elapsed wall-clock time comparing standard PVM versions with the QM-based versions demonstrate substantial performance gains for both methods of load balancing. When processing a 1000 x 1000 image, for example, the QM-based Mandelbrot application averaged 63.92 seconds, compared to 139.62 seconds for the standard PVM version in a heterogeneous [sic] network of five workstations (comprised of Sun4's and IBM RS/6000).", acknowledgement = ack-nhfb, keywords = "Parallel computers.; Queuing theory; Virtual computer systems", } @InProceedings{Simonsen:1993:DMD, author = "H. H. Simonsen and J. Amundsen", title = "Distributed Molecular Dynamics Using the {PVM} System", crossref = "Sincovec:1993:SCP", pages = "183--186", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Skjellum:1993:SLH, author = "A. Skjellum", title = "Scalable libraries in a heterogeneous environment", crossref = "IEEE:1993:PIS", pages = "13--20", year = "1993", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", classification = "C5440 (Multiprocessor systems and techniques); C6155 (Computer communications software)", keywords = "Communicating processes; Communication contexts; Heterogeneous environment; Heterogeneous network environment; Message-passing features; MPI standard; Multicomputer libraries; Multicomputer toolbox first-generation scalable libraries; Scalable libraries; User program; Zipcode", thesaurus = "Computer communications software; Message passing; Multiprocessing systems", } @Article{Smith:1993:DSI, author = "S. L. Smith", title = "Dynamic scheduling of irregularly structured parallel computations in heterogeneous distributed systems", journal = j-SIGPLAN, volume = "28", number = "1", pages = "86", month = jan, year = "1993", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "CERFACS, Toulouse, France", classification = "C6150N (Distributed systems)", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "Dynamic centralized scheduling; Global optimization; Heterogeneous computing environments; Heterogeneous distributed systems; Irregularly structured parallel computations; Parallel algorithm; Parallel virtual machine; Performance evaluation; PVM environment; Simulation", thesaurus = "Distributed processing; Parallel programming; Scheduling", } @InProceedings{Smith:1993:MBA, author = "K. A. Smith", title = "Multi-Processor Based Accident Using {PVM}", crossref = "Sincovec:1993:SCP", pages = "262--265", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Sochacki:1993:DCW, author = "J. S. Sochacki and D. Mitchum and P. O'Leary and R. E. Ewing", title = "Distributed Computation of Wave Propagation Models Using {PVM}", crossref = "IEEE:1993:PSP", pages = "22--33", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Sunderam:1993:PCC, author = "V. Sunderam", title = "The {PVM} Concurrent Computing System", crossref = "Anonymous:1993:CDP", pages = "20--84", year = "1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{vanderPas:1993:PIG, author = "R. {van der Pas}", title = "The {PVM} implementation of a {Generalized Red Black} algorithm", journal = j-SUPERCOMPUTER, volume = "10", number = "4-5", pages = "72--85", month = jul # "--" # sep, year = "1993", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Convex Computer, Utrecht, Netherlands", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessor systems and techniques); C5470 (Performance evaluation and testing)", corpsource = "Convex Computer, Utrecht, Netherlands", fjournal = "Supercomputer", keywords = "Convex Meta Series; EuroBen benchmark; evaluation; Generalized Red Black algorithm; module MOD3H; Module MOD3H; multiprocessing systems; parallel algorithms; performance; performance measurements; Performance measurements; performance numbers; Performance numbers; Poisson; Poisson solver; PVM implementation; solver", pubcountry = "Netherlands", thesaurus = "Multiprocessing systems; Parallel algorithms; Performance evaluation", treatment = "P Practical", } @PhdThesis{Wilkinson:1993:IFT, author = "Timothy James Wilkinson", title = "Implementing Fault Tolerance in a 64-bit Distributed Operating System", school = "Systems Architecture Research Centre, City University", address = "London, UK", month = jul, year = "1993", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Misc/sasos.bib", abstract = "This thesis explores the potential of 64-bit processors for providing a different style of distributed operating system. Rather than providing another reworking of the UNIX model, the use of the large address space for unifying volatile memory (virtual memory), persistent memory (file systems) and distributed network access is examined and a novel operating system, Arius, is proposed. The concepts behind the design of Arius are briefly reviewed, and then the reliability of such a system is examined in detail. The unified nature of the architecture makes it possible to use a reliable single address space to provide a completely reliable system without the addition of other mechanisms. Protocols are proposed to provide locally scalable distributed shared memory and these are then augmented to handle machine failures transparently though the use of distributed checkpoints and rollback. The checkpointing system makes use of the caching mechanism in DSM to provide data duplication for failure recovery. By using distributed memory for checkpoints, recovery from machine faults may be handled seamlessly. To cope with more ``complete'' failures, persistent storage is also included in the failure mechanism. These protocols are modelled to show their operability and to determine the cost they incur in various types of parallel and serial programs. Results are presented to demonstrate these costs.", } @InProceedings{Young:1993:PEN, author = "Y.-H. Young and K. Sikorski", title = "Performance evaluation of network programming environments", crossref = "Mudge:1993:PTS", pages = "106--107 (vol. 2)", year = "1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT, USA", classification = "C5620 (Computer networks and techniques); C5670 (Network performance); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems)", keywords = "Benchmark tests; EXPRESS; ISIS; Jacobi iterative algorithms; Library support; LINDA; Message passing; Monte Carlo simulation; Network programming environments; Performance evaluation; PVM; Scalability; TCGMSG; TCP/IP network protocol; Token ring network; UDP/IP network protocol", thesaurus = "Computer networks; Message passing; Monte Carlo methods; Performance evaluation; Programming environments; Protocols", } @InProceedings{Zollweg:1993:OP, author = "J. A. Zollweg", title = "Overview of {PVM}", crossref = "Anonymous:1993:PSE", pages = "981--986", year = "1993", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Cornell Nat. Supercomput. Facility, NY, USA", classification = "C5640 (Protocols); C6110P (Parallel programming); C6150N (Distributed systems software); C6150N (Distributed systems); C7320 (Physics and chemistry computing); C7320 (Physics and Chemistry)", corpsource = "Cornell Nat. Supercomput. Facility, NY, USA", keywords = "frequent communication; Frequent communication; high; High performance switch; message passing; message-passing environment; Message-passing environment; parallel programming; Parallel Virtual Machine; performance switch; physics computing; protocols; PVM package; scalable POWERparallel system; Scalable POWERparallel system; scientific application; Scientific application; software packages; TCP/IP communication; virtual machines; workstations; Workstations", pubcountry = "Switzerland", thesaurus = "Message passing; Parallel programming; Physics computing; Protocols; Software packages; Virtual machines", treatment = "G General Review; P Practical", } @InProceedings{Altas:1994:NIE, author = "I. Altas and M. Rezny and J. Louis and K. Burrage and R. Moore and J. Belward", title = "A new image enhancement algorithm on {MasPar} and {Parallel Virtual Machine} ({PVM}) environments", crossref = "Dekker:1994:MPP", pages = "819--826", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centre for Image Anal., Charles Sturt Univ., Wagga Wagga, NSW, Australia", classification = "C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming)", corpsource = "Centre for Image Anal., Charles Sturt Univ., Wagga Wagga, NSW, Australia", keywords = "computation environment; equations; fine grain; Fine grain computation environment; image enhancement; image enhancement algorithm; Image enhancement algorithm; Machine; MasPar; minimisation; optimal image enhancement; Optimal image enhancement; parallel algorithms; Parallel Virtual; Parallel Virtual Machine; partial differential; partial differential equations; Partial differential equations; processing time; Processing time; variational; Variational minimisation", pubcountry = "Netherlands", sponsororg = "AKZO NOBEL; BSO; Convex Comput.; HPCN projects; IBM; NOWESP; et al", thesaurus = "Image enhancement; Parallel algorithms; Partial differential equations", treatment = "T Theoretical or Mathematical", } @InProceedings{Alund:1994:CFD, author = "A. Alund and P. Lotstedt and R. Ryden", title = "Computational fluid dynamics on workstation clusters in industrial environments", crossref = "Dongarra:1994:PSC", pages = "1--10", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Swedish Inst. of Appl. Math., Goteborg, Sweden", classification = "C4100 (Numerical analysis); C5620L (Local area networks); C6110P (Parallel programming); C6150N (Distributed systems software); C7460 (Aerospace engineering computing)", keywords = "3D Navier--Stokes code; ABB Corporate Research; Combustion chambers; Compressible flow; Computational fluid dynamics; CRAY Y-MP processor; Ethernet; Industrial environments; Industrial production codes; Multigrid method; Numerical simulations; Parallelisation; PVM message passing system; SAAB Military Aircraft; SGI R4000 workstations; Stationary Euler equations; Stationary Navier--Stokes equations; Swedish Institute of Applied Mathematics; Turbulent flow; Volvo Flygmotor; Workstation clusters", thesaurus = "Aerospace computing; Chemically reactive flow; Combustion; Compressible flow; Engineering workstations; Flow simulation; Local area networks; Navier--Stokes equations; Numerical analysis; Parallel programming; Turbulence", } @InProceedings{Amato:1994:PEP, author = "M. Amato and A. Matrone and P. Schiano", title = "A practical experience in parallelizing a large {CFD} code: the {ENSOLV} flow solver", crossref = "Gentzsch:1994:HPC", pages = "508--513", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centro Italiano Ricerche Aerospaziala, Capua, Italy", classification = "A4710 (General fluid dynamics theory, simulation and other computational methods); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6140D (High level languages); C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", keywords = "30000-Fortran-statements code; 3D Thin Layer Navier--Stokes code; Complex aerodynamic configuration; Computational fluid dynamics; Data transmission; ENSOLV flow solver; Large CFD code; Message passing; MIMD machines; Multidisciplinary group; Parallel architectures; Parallel implementation; Practical experience; PVM; Subsonic/transonic flow", thesaurus = "Aerodynamics; FORTRAN; Message passing; Navier--Stokes equations; Parallel machines; Parallel programming; Physics computing", } @InProceedings{Andersen:1994:PIA, author = "B. S. Andersen and P. Kaae and C. Keable and W. Owczarz and J. Wasniewski and Z. Zlatev", title = "{PVM} Implementations of Advection-Chemistry Modules of Air Pollution Models", crossref = "Dongarra:1994:PSC", pages = "11--16", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Tech. Univ. Denmark, Lyngby, Denmark", classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software); C7320 (Physics and chemistry computing); C7340 (Geophysics computing)", corpsource = "Tech. Univ. Denmark, Lyngby, Denmark", keywords = "advection-chemistry modules; Advection-chemistry modules; air pollution; air pollution models; Air pollution models; Air pollution reduction; atmospheric chemistry; chemical transformations; Chemical transformations; chemistry computing; environmental science computing; evaluation; geophysical fluid dynamics; implementations; mathematical models; Mathematical models; module testing; Module testing; parallel programming; performance; Performance; program testing; PVM; PVM implementations; PVM program; reduction; software performance; transport; Transport; virtual machines; wind; Wind", pubcountry = "Germany", sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math. Modelling; Tech. Univ. Denmark", thesaurus = "Air pollution; Atmospheric chemistry; Chemistry computing; Environmental science computing; Geophysical fluid dynamics; Parallel programming; Program testing; Software performance evaluation; Virtual machines; Wind", treatment = "P Practical", } @InProceedings{Anonymous:1994:ALM, author = "Anonymous", title = "Adaptive Load Migration Systems for {PVM}", crossref = "IEEE:1994:PSW", pages = "390--399", year = "1994", bibdate = "Mon Aug 26 10:38:41 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Anonymous:1994:MMP, author = "Anonymous", title = "{MPI}: a message-passing interface standard", journal = j-IJSAHPC, volume = "8", number = "3/4", pages = "159--416", month = "Fall-Winter", year = "1994", CODEN = "IJSAE9", ISSN = "0890-2720", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Message passing is a paradigm used widely on certain classes of parallel machines, especially these with distributed memory. Although there are many variations, the basic concept of processes communicating through messages is well understood. Over the last ten years, substantial progress has been made in casting significant applications in this paradigm. Each vendor has implemented its own variant. More recently, several systems have demonstrated that a message-passing system can be efficiently and portably implemented. A definition of both the syntax and semantics of a core of library routines is thus presented. It will be useful to a wide range of users and efficiently implementable on a wide range of computers.", acknowledgement = ack-nhfb, classification = "722.2; 722.3; 722.4; 723.1; 723.1.1; C5440 (Multiprocessing systems); C6110P (Parallel programming); C6140D (High level languages); C6150N (Distributed systems software)", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", keywords = "C; C (programming language); C language; Codes (standards); Collective communication; collective communication; Computational linguistics; Computer software; Conventions; conventions; Data communication systems; FORTRAN; FORTRAN (programming language); Fortran 77; Fortran bindings; Interfaces (computer); Language binding; language binding; message passing; Message passing interface; Message Passing Interface; Message-passing interface standard; message-passing interface standard; MPI environmental management; MPI function; MPI function index; MPI terms; Name-shifting convention; name-shifting convention; Parallel machines; Parallel processing systems; parallel programming; Point to point communication; Point-to-point communication; point-to-point communication; Process group collective communication operations; process group collective communication operations; Process topologies; process topologies; Profiling interface; profiling interface; Programmer; programmer; Standardization; standards; Topological structures; topological structures; Unique communication contexts; unique communication contexts; Utility functions; utility functions", thesaurus = "C language; FORTRAN; Message passing; Parallel programming; Standards", treatment = "P Practical", } @InProceedings{Antonuccio-Delogu:1994:PTN, author = "V. Antonuccio-Delogu and U. Becciani", title = "A parallel tree {N-body} code for heterogeneous clusters", crossref = "Dongarra:1994:PSC", pages = "17--32", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Catania Astrophys. Obs., Italy", classification = "C1160 (Combinatorial mathematics); C4240P (Parallel programming and algorithm theory); C5620L (Local area networks); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C7320 (Physics and chemistry computing); C7350 (Astronomy and astrophysics computing)", keywords = "Cantania Astrophysical Observatory; Ethernet; F77 version; Generic situations; Heterogeneous clusters; Heterogeneous workstation collection; Hypercube communication pattern; Incomplete hypercube; Mini-supercomputer; Orthogonal recursive bisection oct-tree scheme; Parallel Barnes--Hut 3D N-body tree algorithm; Parallel tree N-body code; Parallelization scheme; Processing units; PVM 3.2.5; Software environment; SPMD paradigm", thesaurus = "Astronomy computing; Hypercube networks; Local area networks; Octrees; Parallel algorithms; Parallel programming; Physics computing; Software performance evaluation; Software portability; Virtual machines; Workstations", } @Article{Averbuch:1994:PES, author = "A. Averbuch and E. Gabber and S. Itzikowitz and B. Shoham", title = "On the parallel elliptic single\slash multigrid solutions about aligned and nonaligned bodies using the {Virtual Machine for Multiprocessors}", journal = j-SCI-PROG, volume = "3", number = "1", pages = "13--32", month = "Spring", year = "1994", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4170 (Differential equations); C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Sch. of Math. Sci., Tel Aviv Univ., Israel", fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", keywords = "algorithm; aligned bodies; alignment overhead; architectures; coherent services; distributed memory multiprocessor; efficient programming; elliptic equations; explicitly parallel application programs; for Multi-Processors software package; grid points; memory multiprocessors; memory systems; message passing; MIMD; MOS; multi-user shared memory multiprocessors; multiprocessors; nonaligned bodies; parallel architectures; parallel elliptic; parallel elliptic multigrid solutions; parallel program writing; parallel programming; partial differential equations; performance; portable programming; Sequent Symmetry; shared; single grid solution; single-user shared; software packages; software portability; transputer network; transputer systems; Virtual Machine", treatment = "P Practical", } @InProceedings{Aversa:1994:PSH, author = "R. Aversa and N. Mazzocca and U. Villano", title = "{PS}: a simulator for heterogeneous computing environments", crossref = "Dekker:1994:MPP", pages = "335--343", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Inf. e Sistemistica, Naples Univ., Italy", classification = "C6150N (Distributed systems software)", keywords = "Distributed applications; Heterogeneous computing environment simulator; Performance analysis; Performance index accuracy; PS; PVM run-time system; PVM Simulator; Simulation environment; Simulator architecture", thesaurus = "Parallel processing; Software performance evaluation", } @InProceedings{Bachem:1994:PCT, author = "A. Bachem and W. Hochst{\"a}ttler and M. Malich", title = "Simulated Trading --- a New Parallel Approach For Solving Vehicle Routing Problems", crossref = "Joubert:1994:PCT", pages = "471--475", year = "1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Techreports/ZPR.Koeln.bib", annote = "We present a parallel improvement heuristic for solving vehicle routing problems with additional constraints. The algorithm was implemented on a parallel transputer machine and on a cluster of workstations using PVM. The computational results obtained with sequential and parallel Simulated Trading show that our approach is superior compared to all heuristics known to the authors by now.", crindex = "29k,6,zpr92-125.ps.gz", } @Article{Bala:1994:IEU, author = "V. Bala and J. Bruck and R. Bryant and R. Cypher and P. {De Jong}", title = "The {IBM} external user interface for scalable parallel systems", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "445--??", month = apr, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @InProceedings{Ballico:1994:PSP, author = "M. Ballico and H. Lederer", title = "{Plasmafusionsforschung: Serielles und paralleles Rechnen mit nur einem Programmcode auf Cray YMP, nCUBE2, Workstations mit PVM und KSR1}", crossref = "Anonymous:1994:FWR", pages = "232--234", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Baltas:1994:CPC, author = "N. D. Baltas and C. S. van den Berghe", title = "Comparison of the porting of a computational fluid dynamics application to {SIMD} and {MIMD} computers", crossref = "Dekker:1994:MPP", pages = "761--767", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "CHAM, London, UK", classification = "C6110B (Software engineering techniques); C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", keywords = "Computational fluid dynamics; DAP; ESPRlT III project; Fortran-Plus; Maintainability; Massively parallel architectures; Massively parallel computers; Message-passing libraries; MIMD; MIMD Parsytec; Parallel Software-Hardware Application; PARIY; PARMACS; Parsytec model; PASHA; PHOENICS; Porting; Programming models; PVM; Scalable code; SIMD; SIMD DAP", thesaurus = "Message passing; Parallel architectures; Parallel programming; Physics computing; Research initiatives; Software maintenance; Software portability", } @InProceedings{Beguelin:1994:CMS, author = "A. Beguelin and B. Bruegge", title = "A configurable monitoring system for parallel programming", crossref = "IEEE:1994:PSI", pages = "206", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C0310F (Software development management); C5440 (Multiprocessor systems and techniques); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems)", keywords = "BEE++; Configurable distributed monitoring system; Distributed programs; Heterogeneous systems; Message passing system; Parallel program debugging; Parallel programming; Parallel virtual machine; PVM", thesaurus = "Configuration management; Message passing; Parallel programming; Program debugging; System monitoring", } @Article{Beguelin:1994:HHN, author = "A. Beguelin and J. J. Dongarra and G. Al Geist and R. Manchek and K. Moore", title = "{HeNCE}: a heterogeneous network computing environment", journal = j-SCI-PROG, volume = "3", number = "1", pages = "49--60", month = "Spring", year = "1994", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C1160 (Combinatorial mathematics); C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", keywords = "Aggregate resources; Compilation; Data dependencies; Data formats; Debugging; Directed graphs; Execution; Graph nodes; Graphical language; HeNCE; Heterogeneous network computing environment; Integrated graphical environment; Local area network; Network computation; Networked computers; Operating systems; Parallel programs; Parallel virtual machine; Parallelism; Supercomputer performance; Tracing; Writing", thesaurus = "Directed graphs; Local area networks; Parallel programming; Parallelising compilers; Program debugging; Virtual machines", } @InProceedings{Beletsky:1994:OPV, author = "V. Beletsky and T. Popova and A. Chemeris", title = "Organization of a parallel virtual machine", crossref = "Horiguchi:1994:ISP", pages = "421--426", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Lab. of Parallel Comput., Acad. of Sci., Kiev, Ukraine", keywords = "compilers; dependence graph building; loop parallelization; parallel architectures; parallel machines; parallel virtual machine organization; processor; program compilers; scheduling; scheduling job programs; simulating programs; simulation; virtual machines", sponsororg = "Japan Advanced Inst. Sci. and Technol.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Comput. Archit.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; IPSJ Tech. Committee on Algorithms; IPSJ Tech. Committee on Comput. Archit.; IEICE Tech. Committee on Comput. Syst", treatment = "P Practical", } @MastersThesis{Biradar:1994:ADL, author = "Umesh V. Biradar", title = "Adaptive distributed load balancing model for parallel virtual machine", type = "Master of Science in Computer Science", school = "Department of Computer Science, College of Engineering, Lamar University", address = "Beaumont, TX, USA", pages = "viii + 44", year = "1994", bibdate = "Mon Jan 15 18:16:39 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Distributed operating systems (Computers); Electronic data processing --- Distributed processing.; Multiprocessors; Parallel processing (Electronic computers)", } @TechReport{Bischof:1994:CSM, author = "Christian Bischof and Institute for Defense Analyses", title = "A Case study of {MPI}: portable and efficient libraries", type = "Technical report", number = "SRC-TR-94-130", institution = "Supercomputing Research Center: IDA", address = "Lanham, MD, USA", pages = "6", year = "1994", bibdate = "Sat Feb 24 09:43:12 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "``In this paper, we discuss the performance achieved by several implementations of the recently defined Message Passing Interface (MPI) standard. In particular, performance results for different implementations of the broadcast operation are analyzed and compared on the Delta, Paragon, SP1 and CM5.'' Supported in part by the Applied and Computational Mathematics Program, Advanced Research Projects Agency. Supported in part by the Office of Scientific Computing, U.S. Department of Energy.", acknowledgement = ack-nhfb, annote = "This paper will appear in the proceedings of the Seventh SIAM conference on Parallel Processing for Scientific Computing, September 15, 1994.", keywords = "Parallel processing (Electronic computers)", } @InProceedings{Boerger:1994:FSP, author = "E. Boerger and U. Glaesser", title = "A Formal Specification of the {PVM} Architecture", crossref = "Pehrson:1994:IPP", pages = "402--409", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Borger:1994:AMP, author = "E. Borger and U. Glasser", title = "An abstract model of the {Parallel Virtual Machine} ({PVM})", crossref = "Anonymous:1994:PDC", pages = "308--309", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software)", corpsource = "Dipartimento di Inf., Pisa Univ., Italy", keywords = "(computers); abstract model; abstraction level; algebraic specification; architecture; distributed memory computer; distributed memory systems; formal; heterogeneous distributed computing; message passing; operating systems; Parallel Virtual Machine; PVM; specification; user view; virtual machines", sponsororg = "Int. Soc. Comput. and Their Appl.-ISCA; IEEE; Nat. Supercomput. Centre for Energy and Environ.; Northern Telecom; CRAY Res", treatment = "P Practical", } @Article{Borger:1994:FSP, author = "E. Borger and U. Glasser", title = "A formal specification of the {PVM} architecture", journal = j-IFIP-TRANS-A, volume = "A-51", pages = "402--409", month = "????", year = "1994", CODEN = "ITATEC", ISSN = "0926-5473", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Inf., Pisa Univ., Italy", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C5440 (Multiprocessor systems and techniques); C6110B (Software engineering techniques); C6150N (Distributed systems software); C6150N (Distributed systems)", conflocation = "Hamburg, Germany; 28 Aug.-2 Sept. 1994", conftitle = "Technology and Foundations Information Processing '94. IFIP 13th World Computer Congress", corpsource = "Dipartimento di Inf., Pisa Univ., Italy", fjournal = "IFIP Transactions. A. Computer Science and Technology", keywords = "concurrent evolving algebras; Concurrent evolving algebras; data structures; Data structures; distributed computing; distributed processing; formal specification; Formal specification; heterogeneous; Heterogeneous distributed computing; Machine; message; message passing; Message passing; parallel machines; Parallel Virtual; Parallel Virtual Machine; passing; PVM architecture; virtual machines", pubcountry = "Netherlands", thesaurus = "Distributed processing; Formal specification; Message passing; Parallel machines; Virtual machines", treatment = "P Practical", } @InProceedings{Boryczko:1994:LGA, author = "K. Boryczko and M. Bubak and J. Kitowski and J. Moscinski and R. Slota", title = "Lattice gas automata and molecular dynamics on a network of computers", crossref = "Gentzsch:1994:HPC", pages = "177--180", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. Inf., Krakow, Poland", classification = "A0340G (Fluid dynamics: general mathematical aspects); A0550 (Lattice theory and statistics; A4710 (General fluid dynamics theory, simulation and other computational methods); C4240P (Parallel programming and algorithm theory); C5620L (Local area networks); C6110P (Parallel programming); C7320 (Physics and chemistry computing); Ising problems)", keywords = "Computer network; CONVEX C3210; Express; Fluid flow simulation; Lattice gas automata; Molecular dynamics; Network Linda; P4; Parallel algorithms; Parallel programs; PVM; Workstation network", thesaurus = "Automata theory; Digital simulation; Flow simulation; Hydrodynamics; Lattice gas; Local area networks; Parallel algorithms; Parallel programming; Physics computing; Workstations", } @InProceedings{Briley:1994:NNH, author = "W. R. Briley and D. S. Reese and A. Skjellum and L. H. Turcotte", title = "{NHPDCC}: The {National High Performance Distributed Computing Consortium}", crossref = "IEEE:1994:PSP", pages = "2--9", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "NSF Eng. Res. Center for Comput. Field Simulation, Mississippi State Univ., MS, USA", classification = "C0200 (General computer topics); C6150N (Distributed systems software)", keywords = "Benchmarks; Consortium; High performance computing; MPI message-passing; Multi-vendor; National High Performance Distributed Computing Consortium; NHPDCC; Scalable parallel libraries; Software projects", thesaurus = "Distributed processing; Societies", } @InProceedings{Bubak:1994:EMD, author = "M. Bubak and J. Moscinski and M. Pogoda and W. Zdechlikiewicz", title = "Efficient molecular dynamics simulation on networked workstations", crossref = "Gruber:1994:PJE", pages = "191--194", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci, Cracow, Poland", classification = "A0260 (Numerical approximation and analysis); A6120J (Computer simulation of static and dynamic liquid behaviour); C4240C (Computational complexity); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing)", keywords = "Computational complexity; Efficient molecular dynamics simulation; Large particle numbers; Link lists; Neighbor lists; Networked workstations; Parallel 2D molecular dynamics program; Parallel MD algorithm; PVM programming environment; Sequential 2D molecular dynamics program", thesaurus = "Computational complexity; Digital simulation; List processing; Local area networks; Molecular dynamics method; Parallel algorithms; Parallel programming; Physics computing; Workstations", } @InProceedings{Bubak:1994:FLG, author = "M. Bubak and J. Moscinski and R. Slota", title = "{FHP} lattice gas on networked workstations", crossref = "Gruber:1994:PJE", pages = "427--430", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Control Sci., Cracow, Poland", classification = "A0550 (Lattice theory and statistics; A4710 (General fluid dynamics theory, simulation and other computational methods); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing); Ising problems)", keywords = "Computer power; Domain decomposition; Dynamic load balancing; FHP lattice gas; Fluid flows; Lattice gas simulation; Networked workstations; Normal load; Parallel algorithm; Parallel distributed program; PVM", thesaurus = "Digital simulation; Flow simulation; Fluid dynamics; Lattice gas; Local area networks; Operating systems [computers]; Parallel algorithms; Parallel programming; Physics computing; Resource allocation; Workstations", } @InProceedings{Bubak:1994:IPL, author = "M. Bubak and J. Moscinski and R. Slota", title = "Implementation of Parallel Lattice Gas Program on Workstations under {PVM}", crossref = "Dongarra:1994:PSC", pages = "136--146", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza, Cracow, Poland", classification = "A0270 (Computational techniques); A0545 (Theory and models of chaotic systems); A0550 (Lattice theory and statistics; A4710 (General fluid dynamics theory, simulation and other computational methods); C4220 (Automata theory); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7320 (Physics and chemistry computing); Ising problems)", conflocation = "Lyngby, Denmark; 20-23 June 1994", conftitle = "Proceedings of Workshop on Parallel Scientific Computing", corpsource = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza, Cracow, Poland", keywords = "algorithms; balancing; cellular automata; domain decomposition; Domain decomposition; dynamic load; Dynamic load balancing; flow simulation; fluid flow simulation; Fluid flow simulation; lattice gas; lattice gas automata program; Lattice gas automata program; parallel; parallel lattice gas program; Parallel lattice gas program; physics computing", pubcountry = "Germany", sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math. Modelling; Tech. Univ. Denmark", thesaurus = "Cellular automata; Flow simulation; Lattice gas; Parallel algorithms; Physics computing", treatment = "T Theoretical or Mathematical", } @InProceedings{Bubak:1994:PDS, author = "M. Bubak and J. Mosciniski and M. Pogoda and W. Zdechlikiewicz", title = "Parallel distributed {2-D} short-range molecular dynamics on networked workstations", crossref = "Dongarra:1994:PSC", pages = "127--135", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza, Cracow, Poland", classification = "A0260 (Numerical approximation and analysis); A0270 (Computational techniques); A6120J (Computer simulation of static and dynamic liquid behaviour); C4240C (Computational complexity); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", keywords = "Computational complexity; Execution time; Lennard-Jones systems; Link lists; Memory requirements; Neighbour lists; Networked workstations; Parallel algorithm; Parallel distributed 2-D short-range molecular dynamics; PVM programming environment", thesaurus = "Computational complexity; Lennard-Jones potential; Molecular dynamics method; Parallel algorithms; Physics computing", } @InProceedings{Burrer:1994:RRB, author = "C. Burrer and P. Remy", title = "{RUBIS}: a runtime basic interface software on {TELMAT T9000 TN} series", crossref = "deGloria:1994:TAS", pages = "63--78", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "TELMAT MULTINODE, Soultz, France", classification = "C6110P (Parallel programming); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150J (Operating systems); C6150N (Distributed systems software)", keywords = "Code portability; Compiling; Configuration; Functional flexibility; INMOS TOOLSET environment; Local resources management; Micro-kernel; MPI prototype; Object abstractions; Parallel programs; Parallel transputer machines; PVM subsystem; RUBIS; Runtime basic interface software; T.Paragraph post-mortem tool; TELMAT T9000 TN series", thesaurus = "Operating system kernels; Parallel programming; Parallelising compilers; Program diagnostics; Software performance evaluation; Software portability; Transputer systems", } @InProceedings{Campanai:1994:EAS, author = "M. Campanai and O. Morales and S. Viti and R. Trotta and P. Viliani and M. {Lo Moro}", title = "Experiences assessing software testing activities: the adoption of {PVM}, a prediction and validation model", crossref = "Anonymous:1994:SQC", pages = "491--500", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C0310F (Software development management); C6110S (Software metrics); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "CESVIT/CQ ware, Florence, Italy", keywords = "activity optimization; improvement path; management; managers; prediction and; program testing; program verification; project leaders; PVM; software; software development; software metrics; software system; software testing; software testing activity assessment; spatial applications; system monitoring; technicians; telecommunications; validation model", pubcountry = "Switzerland", treatment = "P Practical", } @InProceedings{Casas:1994:ALM, author = "J. Casas and R. Konuru and S. W. Otto and R. Prouty and J. Walpole", title = "Adaptive load migration systems for {PVM}", crossref = "IEEE:1994:PSW", pages = "390--399", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://sc94.ameslab.gov/AP/contents.html", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Portland, OR, USA", classification = "C6150N (Distributed systems software)", conflocation = "Washington, DC, USA; 14-18 Nov. 1994", conftitle = "Proceedings of Supercomputing '94", corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Portland, OR, USA", keywords = "adaptive data movement; Adaptive data movement; adaptive load distribution; Adaptive load distribution; adaptive load migration systems; Adaptive load migration systems; ADM; allocation; effectiveness; Effectiveness; heterogeneous workstation network; message passing; message passing system; Message passing system; migratable PVM; Migratable PVM; MPVM; parallel algorithms; parallel applications; Parallel applications; performance; Performance; programming; programming methodology; Programming methodology; resource; shared; Shared heterogeneous workstation network; transparent migration; Transparent migration; Unix; Unix process; UPVM; usability; Usability; user-level PVM; User-level PVM; virtual machines; virtual processors; Virtual processors; workstation environment changes; Workstation environment changes", sponsororg = "IEEE Comput. Soc.; ACM; SIAM", thesaurus = "Message passing; Parallel algorithms; Programming; Resource allocation; Unix; Virtual machines", treatment = "P Practical", } @InProceedings{Castagnera:1994:NEP, author = "K. Castagnera and D. Cheng and R. Fatoohi and E. Hook and B. Kramer and C. Manning and J. Musch and C. Niggley and W. Saphir and D. Sheppard and M. Smith and I. Stockdale and S. Welch and R. Williams and D. Yip", title = "{NAS} experiences with a prototype cluster of workstations", crossref = "IEEE:1994:PSW", pages = "410--419", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "NAS Syst. Div., NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C5430 (Microcomputers); C5470 (Performance evaluation and testing); C5620 (Computer networks and techniques); C6150N (Distributed systems software)", keywords = "Aeroscience problems; Computational fluid dynamics; Cycle recovery; Loosely coupled cluster; NAS; NAS Parallel Benchmarks; OVERFLOW-PVM; Performance evaluation; Primary system users; Prototype cluster; Silicon Graphics; System management issues; Workstation cluster", thesaurus = "Distributed processing; Fluid dynamics; Message passing; Performance evaluation; Physics computing; Workstations", } @InProceedings{Cheng:1994:PDP, author = "D. Cheng and R. Hood", title = "A portable debugger for parallel and distributed programs", crossref = "IEEE:1994:PSW", pages = "723--732", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C5620L (Local area networks); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", keywords = "Client portability; Client server interaction; Client server model; Debugger code; Distributed programs; Message passing implementations; Message passing library; Message passing programs; MPI programs; Parallel programs; Portable debugger; Process abstractions; PVM; Server components; Tool generated code; User interface", thesaurus = "Client-server systems; Message passing; Parallel programming; Program debugging; Software portability", } @Misc{Choudhary:1994:LCR, author = "Alok Choudhary and Ian Foster and Geoffrey Fox and Ken Kennedy and Carl Kesselman and Charles Koelbel and Joel Saltz and Marc Snir", title = "Languages, Compilers, and Runtime Systems Support for Parallel Input-Output", year = "1994", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/Parallel.io.bib", note = "Scalable I/O Initiative Working Paper Number 3. On WWW at \path=http://www.ccsf.caltech.edu/SIO/SIO.html=.", URL = "http://www.ccsf.caltech.edu/SIO/SIO.html", comment = "Language extensions to support parallel I/O. Compiler optimizations. Runtime library to support the compiler and interface with the native file system. Compiler would develop a mapping of data to the processor memories and to the disks, and then decide on I/O schedules to move data around, overlap I/O with computation, even move computation around to best fit what is available in memory at a given time. It can also help with checkpointing. Compiler should pass info to the runtime system, which in turn may need to pass info to the file system, to help with optimization. I/O scheduling includes reordering accesses; they even go so far as to propose doing seek optimization in the runtime library. Support for collective I/O. Extension of MPI to I/O, to take advantage of its support for asynchrony, scatter-gather, {\em etc}. On the way, they hope to work with the FS people to decide on the functional requirements of the file system. See also poole:sio-survey, bagrodia:sio-character, bershad:sio-os.", keyword = "parallel I/O, multiprocessor file system, pario bib", } @InProceedings{Clarke:1994:MMP, author = "L. Clarke and I. Glendinning and R. Hempel", title = "The {MPI Message Passing Interface Standard}", crossref = "Decker:1994:PEM", pages = "213--218", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Coelho:1994:EHC, author = "F. Coelho", title = "Experiments with {HPF} compilation for a network of workstations", crossref = "Gentzsch:1994:HPC", pages = "423--428", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centre de Recherche en Inf., Ecole des Mines de Paris, Fontainebleau, France", classification = "C6110P (Parallel programming); C6140D (High level languages); C6150C (Compilers, interpreters and other processors)", keywords = "Communication hardware; Data-parallel Fortran; Distributed memory multiprocessors; High Performance Fortran; HPF compilation; Optimizing compiler; PVM 3-based generated code; Scalable performance; Workstation network", thesaurus = "FORTRAN; Optimising compilers; Parallel languages; Parallelising compilers", } @Article{Cooper:1994:CHF, author = "M. D. Cooper and N. A. Burton and R. J. Hall and I. H. Hillier", title = "Combined {Hartree--Fock} and density functional theory: a distributed memory parallel implementation", journal = j-J-MOL-STRUCT-THEOCHEM, volume = "121", pages = "97--107", month = dec, year = "1994", CODEN = "THEODJ", ISSN = "0166-1280 (print), 1872-7999 (electronic)", ISSN-L = "0166-1280", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Chem., Manchester Univ., UK", classification = "A3115 (General mathematical and computational developments for atoms and molecules); A3120J (Local density approximation (atoms and molecules)); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", fjournal = "Journal of molecular structure. Theochem", keywords = "Ab initio code; Density functional theory; Direct SCF energy evaluation; Distributed memory parallel implementation; GAUSSIAN 92; Gradient evaluation; Hartree--Fock theory; Hewlett--Packard 9000-7xx series; Kohn--Sham density functional code; Parallel Virtual Machine; Parallelisation; Portable communications package; PRISM algorithm; PVM; Two-electron integrals; UNIX workstations; Workstation cluster", pubcountry = "Netherlands", thesaurus = "Ab initio calculations; Density functional theory; Distributed memory systems; HF calculations; Parallel algorithms; Parallel programming; Physics computing; SCF calculations", } @InProceedings{Cote:1994:PSA, author = "J. Cote and S. J. Thomas", title = "Parallel Semi-{Lagrangian} Advection on the Sphere Using {PVM}", crossref = "Pierce:1994:PSH", pages = "470--477", year = "1994", bibdate = "Mon Oct 26 07:49:42 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. de Recherche en Prevision Numerique, Environment Canada, Dorval, Que., Canada", classification = "C1160 (Combinatorial mathematics); C4240P (Parallel programming and algorithm theory); C4260 (Computational geometry); C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C6150N (Distributed systems)", keywords = "Courant-Friedrichs-Lewy condition; Distributed MIMD parallel algorithms; Eulerian methods; Intel iPSC/860; Numerical methods; Parallel message-passing implementation; Parallel performance; Parallel semi-Lagrangian advection; Parallel virtual machine; PVM; Semi-Lagrangian method; Shallow-water equations; Sphere; Spherical geometry; Sub-grid dimensions", thesaurus = "Computational geometry; Hypercube networks; Message passing; Parallel algorithms; Parallel machines", } @InProceedings{Cote:1994:PSL, author = "J. Cote and S. J. Thomas", title = "Parallel {semi-Lagrangian} advection on the sphere using {PVM}", crossref = "Dekker:1994:MPP", pages = "801--808", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0260 (Numerical approximation and analysis); A0340G (Fluid dynamics: general mathematical aspects); A4710 (General fluid dynamics theory, simulation and other computational methods); C1160 (Combinatorial mathematics); C4160 (Numerical integration and differentiation); C4240P (Parallel programming and algorithm theory); C4260 (Computational geometry); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5440 (Multiprocessor systems and techniques); C6150N (Distributed systems software); C6150N (Distributed systems); C7320 (Physics and chemistry computing)", corpsource = "Div. de Recherche en Prevision Numerique, Environment Canada, Dorval, Que., Canada", keywords = "algorithms; computational fluid dynamics; computational geometry; Courant Friedrichs Lewy condition; Courant-Friedrichs-Lewy condition; dimensions; distributed memory systems; distributed MIMD implementation; distributed MIMD parallel algorithms; Eulerian methods; evaluation; fluid dynamics; hypercube networks; integration; Intel iPSC/860; Lagrangian method; message passing; methods; numerical; parallel; parallel algorithms; parallel machines; parallel message-passing implementation; parallel semi-Lagrangian advection; passive advection; performance; physics computing; problem; processor; PVM; scalable code; semi-; shallow-water equations; software performance; sphere; spherical geometry; sub-grid; sub-grid dimensions; time steps; transport processes; virtual machine", sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput. Appl", treatment = "P Practical", xxauthor = "S. J. Thomas and J. Cote", } @InProceedings{Cownie:1994:PPP, author = "J. Cownie and A. Dunlop and S. Hellberg and A. J. G. Hey and D. Pritchard", title = "Portable parallel programming environments-the {ESPRIT PPPE} project", crossref = "Dekker:1994:MPP", pages = "135--142", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Meiko Ltd., Bristol, UK", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support)", keywords = "ESPRIT PPPE project; European hardware manufacturers; HPF mapper; Integrated tool environments; Large distributed memory parallel computers; Large scale scientific and engineering applications; Mainstream applications development; Migration aids; Open system standards; Parallel architectures; Parallel debugger; PARMACS/MPI; PCTE; Performance monitor; Portable parallel programming environments; Program debugger; Run-time environment; Software houses", thesaurus = "Parallel architectures; Parallel programming; Programming environments; Research initiatives; Software houses; Software portability; Standards", } @Article{daCunha:1994:PIR, author = "Rudnei Dias {da Cunha} and Tim Hopkins", title = "A parallel implementation of the restarted {GMRES} iterative algorithm for nonsymmetric systems of linear equations", journal = j-ADV-COMPUT-MATH, volume = "2", number = "3", pages = "261--277", month = "????", year = "1994", CODEN = "ACMHEX", ISSN = "1019-7168", ISSN-L = "1019-7168", bibdate = "Mon Oct 07 09:09:23 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We describe the parallelisation of the GMRES$ (c) $ algorithm and its implementation on distributed-memory architectures, using both networks of transputers and networks of workstations under the PVM message-passing system. The test systems of linear equations considered are those derived from five-point finite-difference discretisations of partial differential equations. A theoret model of the computation and communication phases is presented which allows us to decide for which values of the parameter $c$ our implementation executes efficiently. The results show that for reasonably large discretisation grids the implementations are effective on a large number of processors.", acknowledgement = ack-nhfb, affiliation = "Centro de Processamento de Dados, Univ. Federal do Rio Grande do Sul, Brazil", classification = "C4130 (Interpolation and function approximation); C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessor systems and techniques)", fjournal = "Advances in computational mathematics", keywords = "(65F10) Numerical analysis; (65Y05) Numerical analysis; Communication phases; Computer aspects of numerical algorithms; Distributed-memory architectures; Five-point finite-difference discretisations; Iterative methods for linear systems (See also 65N22); Networks of transputers; Networks of workstations; Nonsymmetric systems of linear equations; Numerical linear algebra; Parallel computation; Parallel implementation; Partial differential equations; PVM message-passing system; Restarted GMRES iterative algorithm", pubcountry = "Switzerland", thesaurus = "Distributed memory systems; Finite difference methods; Iterative methods; Message passing; Parallel algorithms; Partial differential equations", } @InProceedings{Damodaran-Kamal:1994:MSR, author = "S. K. Damodaran-Kamal and J. M. Francioni", title = "mdb: a semantic race detection tool for {PVM}", crossref = "Pierce:1994:PSH", pages = "702--709", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Southwestern Louisiana Univ., Lafayette, LA, USA", classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "Dept. of Comput. Sci., Southwestern Louisiana Univ., Lafayette, LA, USA", keywords = "C; C programs; debugging tool; Debugging tool; detection; deterministic replay; Deterministic replay; erroneous executions; Erroneous executions; error; error detection; Error detection; expressions; Fortran programs; hazards and race conditions; mdb; Mdb; message; message passing parallel programs; Message passing parallel programs; nondeterminism; Nondeterminism; parallel programming; passing; program debugging; programs; PVM; receive operation; Receive operation; run-time detection; Run-time detection; semantic; Semantic expressions; semantic race detection tool; Semantic race detection tool; sequential debugger invocation; Sequential debugger invocation", sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput. Appl", thesaurus = "Error detection; Hazards and race conditions; Message passing; Parallel programming; Program debugging", treatment = "P Practical", } @InProceedings{Damodaran-Kamal:1994:TRP, author = "S. K. Damodaran-Kamal and J. M. Francioni", title = "Testing races in parallel programs with an {OtOt} strategy", crossref = "Ostrand:1994:PIS", journal = j-SIGSOFT, year = "1994", CODEN = "SFENDP", ISSN = "0163-5948", ISSN-L = "0163-5948", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, affiliation = "Department of Comput. Sci., Southwestern Louisiana Univ., Lafayette, LA, USA", classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C7430 (Computer engineering)", fjournal = "ACM SIGSOFT Software Engineering Notes", issue = "spec. issue. p. 216-27", journal-URL = "https://dl.acm.org/citation.cfm?id=J728", keywords = "Concurrent programs; Controlled execution; Debugging tool; Exponential complexity; General-purpose run-time testing technique; Mdb; Nondeterminism; One-thread-at-one-time strategy; OtOt strategy; Parallel Virtual Machine; Polynomial time complexity; Race conditions specification; Race detection; Race expressions; Race testing; Unrestricted message passing parallel programs", thesaurus = "Computational complexity; Hazards and race conditions; Message passing; Parallel programming; Program debugging; Program testing; Virtual machines", } @Article{Dean:1994:CPV, author = "C. E. Dean and R. C. Denny and P. C. Stephenson and G. J. Milne and E. Pantos", title = "Computing with parallel virtual machines", journal = j-J-PHYS-IV-COLLOQUE, volume = "4", number = "C9", pages = "C9/445--448", month = nov, year = "1994", CODEN = "JPICEI", ISSN = "1155-4339", ISSN-L = "1155-4339", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, affiliation = "SERC Daresbury Lab., Warrington, UK", classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", fjournal = "Journal de physique. IV, Colloque", keywords = "64-Node Intel iPSC/860 hypercube; Computing elements; CPU performance; DALAI; LSQINT; Parallel execution; Parallel virtual machines; PATTERN; Processing time; PROJECT; Single program multiple data; Synchrotron radiation", thesaurus = "Parallel programming; Physics computing; Synchrotron radiation", } @Article{DeKeyser:1994:RTL, author = "J. DeKeyser and K. Lust and D. Roose", title = "Run-time load balancing support for a parallel multiblock {Euler\slash Navier--Stokes} code with adaptive refinement on distributed memory computers", journal = j-PARALLEL-COMPUTING, volume = "20", number = "8", pages = "1069--1088", month = aug, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Katholieke Univ., Leuven, Belgium", classification = "A4710 (General fluid dynamics theory, simulation and other computational methods); C4185 (Finite element analysis); C4240P (Parallel programming and algorithm theory); C6150N (Distributed systems software)", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "Adaptive mesh refinement; Distributed memory computers; Euler/Navier--Stokes code; Load balance; Load balancing; Mesh refinement; Parallel algorithm; Parallel performance; Parallel programming library", pubcountry = "Netherlands", thesaurus = "Distributed memory systems; Finite element analysis; Fluid dynamics; Navier--Stokes equations; Parallel algorithms; Resource allocation", } @InProceedings{DeRoeck:1994:CFP, author = "Y. H. {De Roeck} and R. E. Plessix", title = "Combining {F90} and {PVM} to Construct Synthetic Seismograms by Ray-Tracing", crossref = "IEEE:1994:OOE", volume = "2", pages = "II-653--II-658", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IFREMER, Brest, France", classification = "A9130R (Controlled source seismology); A9365 (Data and information; A9385 (Instrumentation and techniques for geophysical, hydrospheric and lower atmosphere research); acquisition, processing, storage and dissemination in geophysics); B6140C (Optical information, image and video signal processing); B7710 (Geophysical techniques and equipment); C5260B (Computer vision and image processing techniques); C7340 (Geophysics computing)", conflocation = "Brest, France; 13-16 Sept. 1994", conftitle = "Proceedings of OCEANS'94", corpsource = "IFREMER, Brest, France", keywords = "Energy attenuation; energy attenuation; Explosion seismology; explosion seismology; F90; geophysical prospecting; geophysical signal processing; geophysical techniques; inverse problems; Marine reflection seismic record; marine reflection seismic record; Measurement technique; measurement technique; Modell; modell; Multiple echo; multiple echo; Oceanic crust; oceanic crust; Parallel Virtual Machine; Parallelisation; parallelisation; profiling; Prospecting; prospecting; PVM; Ray-conversion; ray-conversion; Ray-tracing; ray-tracing; Seafloor; seafloor; seismic reflection; Seismic reflection profiling; seismology; Synthetic seismogram; synthetic seismogram; Vectorisation; vectorisation", sponsororg = "Oceanic Eng. Soc. IEEE; Soc. Electr. Electron. France; Communaute Urbaine de Brest", thesaurus = "Geophysical prospecting; Geophysical signal processing; Geophysical techniques; Inverse problems; Seismology", treatment = "P Practical; T Theoretical or Mathematical", } @Article{Deshpande:1994:ADN, author = "Manish Deshpande and Jinzhang Feng and Charles L. Merkle and Ashish Deshpande", title = "Application of a Distributed Network in Computational Fluid Dynamic Simulations", journal = j-IJSA, volume = "8", number = "1", pages = "64--67", month = "Spring", year = "1994", CODEN = "IJSAE9", ISSN = "0890-2720", bibdate = "Tue Feb 18 09:47:23 MST 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "A general-purpose 3-D, incompressible Navier--Stokes algorithm is implemented on a network of concurrently operating workstations using PVM and compared with its performance on a CRAY Y-MP and on an Intel iPSC\slash 860. The problem is relatively computationally intensive, and has a communication structure based primarily on nearest-neighbor communication, making it ideally suited to message passing. Such problems are frequently encountered in CFD, and their solution is increasingly in demand. The communication structure is explicitly coded in the implementation to fully exploit the regularity in message passing in order to produce a near-optimal solution. Results are presented for various grid sizes using up to eight processors.", acknowledgement = ack-nhfb, affiliation = "Dept. of Mech. Eng., Pennsylvania State Univ.", affiliationaddress = "University Park, PA, USA", classification = "631.1.1; 721.1; 722.4; 723.1; 723.2; 723.5", fjournal = "International Journal of Supercomputer Applications", journalabr = "Int J Supercomput Appl High Perform Comput", keywords = "Algorithms; Communication structure; Computational complexity; Computational fluid dynamic simulations; Computer simulation; Computer workstations; Concurrent operations; Data structures; Data transfer; Distributed computer systems; Fluid dynamics; Incompressible Navier--Stokes algorithm; Nearest neighbor communication; Optimization; Three dimensional", } @InProceedings{Dikken:1994:DDL, author = "L. Dikken and F. van der Linden and J. Vesseur and P. Sloot", title = "{DynamicPVM}: {Dynamic} load balancing on parallel systems", crossref = "Gentzsch:1994:HPC", pages = "273--277", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Shell Nederland Informatieverwerking, Rijswijk, Netherlands", classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", keywords = "DynamicPVM; Load balancing; Loosely coupled processors; Migration; Multi tasking; Multiuser; Parallel systems; Parallel Virtual Machine; Process checkpointing; PVM; Restart mechanism; Scheduling", thesaurus = "Message passing; Parallel programming; Processor scheduling; Resource allocation; Virtual machines", } @InProceedings{Dykes:1994:CCP, author = "S. G. Dykes and Xiaodong Zhang and Yan Zhou and Haixu Yang", title = "Communication and computation patterns of large scale image convolutions on parallel architectures", crossref = "Siegal:1994:PEI", pages = "926--931", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "High Performance Comput. and Software Lab., Texas Univ., San Antonio, TX, USA", classification = "C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5260B (Computer vision and picture processing); C5440 (Multiprocessor systems and techniques)", keywords = "CM-5; Communication overhead; Computation patterns; Convolution; Convolution calculations; Execution time; Fast memory store; Imag segmentation; Image processing operations; IPSC/860; Large kernel convolutions; Large scale image convolutions; Memory access demand; Parallel algorithms; Parallel architectures; Processor power; PVM distributed memory multicomputers; Texture segmentation application", thesaurus = "Distributed memory systems; Image segmentation; Image texture; Parallel algorithms; Parallel machines", } @InProceedings{Elamvazuthi:1994:OPA, author = "C. Elamvazuthi and G. A. Manson", title = "{Occam}, {PVM} and the Alternative Construct", crossref = "Miles:1994:PTO", pages = "56--68", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Sheffield Univ., UK", classification = "C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Sheffield Univ., UK", keywords = "alternation construct; Alternation construct; code; Code generation; code mapping; Code mapping; Communicating State Diagram; communication event; Communication event; computer aided software engineering; CSD; diagrammatic representation; Diagrammatic representation; diagrammatic technique; Diagrammatic technique; generation; Machine; methodology; Methodology; model; Model process behaviour; Occam; occam programming language; Occam programming language; parallel; parallel architecture; Parallel architecture; Parallel CASE tool; parallel processing; Parallel processing; Parallel system design; Parallel Virtual; Parallel Virtual Machine; process behaviour; programming; PVM; software tool; Software tool; software tools; system design; virtual machines", pubcountry = "Netherlands", thesaurus = "Computer aided software engineering; Occam; Parallel programming; Software tools; Virtual machines", treatment = "P Practical; T Theoretical or Mathematical", } @Article{Eppstein:1994:CSP, author = "M. J. Eppstein and D. E. Dougherty", title = "A comparative study of {PVM} workstation cluster implementations of a two-phase subsurface flow model", journal = j-ADV-WATER-RESOURCES, volume = "17", number = "3", pages = "181--??", month = "????", year = "1994", CODEN = "AWREDI", ISSN = "0309-1708 (print), 1872-9657 (electronic)", ISSN-L = "0309-1708", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Advances in Water Resources", } @Article{Escaig:1994:PMD, author = "Y. Escaig and G. Touzot and M. Vayssade", title = "Parallelization of a multilevel domain decomposition method", journal = j-COMPUT-SYST-ENG, volume = "5", number = "3", pages = "253--263", month = jun, year = "1994", CODEN = "COSEEO", ISSN = "0956-0521", ISSN-L = "0956-0521", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "CRIHAN, Mont Saint Aignan, France", classification = "C4185 (Finite element analysis); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C7310 (Mathematics computing)", fjournal = "Computing systems in engineering: an international journal", keywords = "CRAY Y-MP; Distributed memory machines; Distributed memory systems; Ethernet network; Finite element method; IBM RS/6000 workstations; Interface problem; MIMD; Multilevel domain decomposition method; Multiple instructions multiple data; Multiprocessor machines; Parallel Virtual Machine; Performance; PVM; Shared memory machine; Shared memory systems", pubcountry = "UK", thesaurus = "Cray computers; Distributed memory systems; Finite element analysis; IBM computers; Mathematics computing; Performance evaluation; Shared memory systems", } @Article{Ewing:1994:DCW, author = "Richard E. Ewing and Robert C. Sharpley and Derek Mitchum and P. O'Leary and J. S. Sochacki", title = "Distributed Computation of Wave Propagation Models Using {PVM}", journal = j-IEEE-PAR-DIST-TECH, volume = "2", number = "1", pages = "26--31", month = "Spring", year = "1994", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/88.281870", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "The Parallel Vitural Machine lets researchers create a powerful, inexpensive parallel system on which they can solve large, sophisticated problems such as simulating the propagation of seismic waves.", acknowledgement = ack-nhfb, affiliation = "Texas A and M Univ., College Station, TX, USA", classification = "A9130F (Surface and body waves); C5440 (Multiprocessor systems and techniques); C5620 (Computer networks and techniques); C6110P (Parallel programming); C6150N (Distributed systems); C7340 (Geophysics)", corpsource = "Texas A and M Univ., College Station, TX, USA", fjournal = "IEEE parallel and distributed technology: systems and applications", keywords = "C; Communication networks; communication networks; Computational power; computational power; Cost effectiveness; cost effectiveness; Distributed computation; distributed computation; Ethernet; Fiber; fiber; Fortran; geophysics computing; Hypercubes; hypercubes; machines; Meshes; meshes; network operating systems; parallel; parallel programming; Parallel Virtual Machine; PVM; Remote procedural libraries; remote procedural libraries; Rings; rings; Seismic wave propagation simulation; seismic wave propagation simulation; seismic waves; Wave propagation models; wave propagation models", thesaurus = "Geophysics computing; Network operating systems; Parallel machines; Parallel programming; Seismic waves", treatment = "P Practical", } @Article{Femminella:1994:PBP, author = "A. Femminella and A. Omodeo", title = "{PVM-based} parallel computing: a case study on power plant simulation", journal = j-MICROPROC-MICROPROG, volume = "40", number = "10-12", pages = "875--878", month = dec, year = "1994", CODEN = "MMICDT", ISSN = "0165-6074 (print), 1878-7061 (electronic)", ISSN-L = "0165-6074", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centro Ricerca di Autom., ENEL, Milan, Italy", classification = "B8110D (Power system planning and layout); B8200 (Generating stations and plants); C6110P (Parallel programming); C6150N (Distributed systems software); C7410B (Power engineering computing)", conflocation = "Liverpool, UK; Sept. 1994", conftitle = "20th Annual Euromicro Conference. System Architecture and Integration", corpsource = "Centro Ricerca di Autom., ENEL, Milan, Italy", fjournal = "Microprocessing and Microprogramming", keywords = "case study; Case study; digital simulation; distributed programming; Distributed programming; distributed software platform; Distributed software platform; heterogeneous workstation; Heterogeneous workstation network; independently evolving; Independently evolving processes; machines; message exchange; Message exchange; network; open systems; parallel programming; Parallel Virtual Machine; periodic synchronization; Periodic synchronization; power plant; Power plant simulation; power plants; power system analysis computing; processes; PVM-based parallel computing; simulation; Transputer network; virtual; weakly-coupled processes; Weakly-coupled processes", pubcountry = "Netherlands", thesaurus = "Digital simulation; Open systems; Parallel programming; Power plants; Power system analysis computing; Virtual machines", treatment = "P Practical", } @InProceedings{Fineberg:1994:IMM, author = "S. A. Fineberg", title = "Implementing multidisciplinary and multi-zonal applications using {MPI}", crossref = "IEEE:1994:FSF", pages = "496--503", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Proceedings Frontiers '95. The Fifth Symposium on the Frontiers of Massively Parallel Computation", corpsource = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett Field, CA, USA", keywords = "codes; Codes; message passing; Message Passing Interface; multidisciplinary applications; Multidisciplinary applications; multizonal applications; Multizonal applications; parallel programming; parallel programs; Parallel programs; performance; Performance; point-to-point message passing routines; Point-to-point message passing routines; portable library; Portable library; single program multiple data stream; Single program multiple data stream; standard; Standard; standards", sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.; NASA; Univ. Maryland Inst. Adv. Comput. Studies; George Mason Univ", thesaurus = "Message passing; Parallel programming; Standards", treatment = "P Practical", } @Article{Flower:1994:EJM, author = "Jon Flower and Adam Kolawa", title = "{Express} is not just a message passing system: current and future directions in {Express}", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "597--614", day = "31", month = apr, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:14:00 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=860", abstract = "We describe some of the features of Express and the way that they were developed as a response to the needs of application programmers. We show how currently emerging computing platforms have led to new application needs and show how these are satisfied with Express features. We introduce a recently developed programming style which greatly simplifies programming as well as directly addressing complex issues such as dynamic load balancing and fault tolerance. Finally, we present a comparison of Express' features and motivation to the Message Passing Interface (MPI) standard currently being developed.", acknowledgement = ack-nhfb, affiliation = "ParaSoft Corp", affiliationaddress = "Pasadena, CA, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150N (Distributed systems)", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Application developers toolkit; Application needs; Computer programming; Computing platforms; Dynamic load balancing; Dynamics; Express; Express features; Fault tolerance; Fault tolerance system; Message passing Express; Message passing interface (MPI) standard; Message passing programming style; Message passing system; MPI standard", pubcountry = "Netherlands", thesaurus = "Message passing; Parallel programming; Resource allocation", } @InProceedings{Franke:1994:EIM, author = "H. Franke and P. Hochschild and P. Pattnaik and M. Snir", title = "An Efficient Implementation of {MPI}", crossref = "Decker:1994:PEM", pages = "219--230", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Franke:1994:MEI, author = "H. Franke and P. Hochschild and P. Pattnaik and M. Snir", title = "{MPI-F}: An Efficient Implementation of {MPI} on {IBM-SP1}", crossref = "Agrawal:1994:PIC", pages = "III-197--III-201", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", conftitle = "Proceedings of 23rd Annual International Conference on Parallel Processing", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", keywords = "distributed memory cluster; distributed memory systems; IBM computers; IBM-SP1; message passing; MPI; parallel architectures; performance; performance evaluation", sponsororg = "Pennsylvania State Univ", treatment = "P Practical", } @InProceedings{Franke:1994:MMP, author = "H. Franke and P. Hochschild and P. Pattnaik and J.-P. Prost and M. Snir", title = "{MPI-F}: an {MPI} Prototype Implementation on {IBM SP1}", crossref = "Dongarra:1994:PSW", pages = "43--55", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150E (General utility programs); C6150N (Distributed systems software); C6180 (User interfaces)", conftitle = "Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", keywords = "application program interfaces; distributed memory systems; External User Interface; IBM 9076 Scalable PowerPARALLEL 1 system; IBM computers; IBM SP1; message passing; Message-Passing Interface; modifications; MPI-F; native EUI library; parallel machines; performance measurements; prototype implementation; software libraries; software performance evaluation; software prototyping; user interfaces", treatment = "P Practical", } @Article{Freeman:1994:SMM, author = "T. L. Freeman and J. M. Bull", title = "Shared Memory and Message Passing Implementations of Parallel Algorithms for Numerical Integration", journal = j-LECT-NOTES-COMP-SCI, volume = "879", pages = "219--228", year = "1994", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Sep 15 10:01:31 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1994.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "computing; heterogeneous network; PARA; parallel scientific computing", } @InProceedings{Gajecki:1994:NAT, author = "M. Gajecki and J. Moscinski", title = "A new algorithm for the traveling salesman problem on networked workstations", crossref = "Dongarra:1994:PSC", pages = "229--235", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci., Akademia Gorniczo-Hutnicza, Cracow, Poland", classification = "C1160 (Combinatorial mathematics); C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C5620L (Local area networks); C6150N (Distributed systems software)", keywords = "Efficiency; Local optimization method; Networked workstations; Parallel algorithm; PVM; SUN SPARCstation IPX; Traveling salesman problem", thesaurus = "Local area networks; Parallel algorithms; Travelling salesman problems; Workstations", } @InProceedings{Geist:1994:CCW, author = "G. A. Geist", title = "Cluster computing: the wave of the future?", crossref = "Dongarra:1994:PSC", pages = "236--246", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., TN, USA", classification = "C0230 (Economic, social and political aspects of computing); C5620L (Local area networks); C6150N (Distributed systems software); C7300 (Natural sciences computing)", keywords = "Cluster computing; Distributed memory computer; Heterogeneous network research project; Oak Ridge National Laboratory; Parallel computers; Parallel Virtual Machine; Portable robust software; PVM; Research issues; Scientific problems; Serial computers; Social issues; Software package; Standard; Tennessee University; User defined computer collection; Vector computers; Workstation clusters", thesaurus = "Distributed memory systems; Local area networks; Natural sciences computing; Social aspects of automation; Software packages; Virtual machines; Workstations", } @Book{Geist:1994:PPV, author = "Al Geist and Adam Beguelin and Jack Dongarra and Weicheng Jiang and Robert Manchek and Vaidyalingam S. Sunderam", title = "{PVM}: Parallel Virtual Machine: a Users' Guide and Tutorial for Networked Parallel Computing", publisher = pub-MIT, address = pub-MIT:adr, pages = "xvii + 279", year = "1994", ISBN = "0-262-57108-0 (paperback)", ISBN-13 = "978-0-262-57108-1 (paperback)", LCCN = "QA76.58 .P85 1994", bibdate = "Thu Feb 29 17:35:15 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$27.50", series = "Scientific and engineering computation", URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571080", acknowledgement = ack-nhfb, keywords = "Computer networks.; Networks --- Parallel programming; Parallel computers.", } @MastersThesis{Grengbondai:1994:CPU, author = "Jules Crephat Grengbondai", title = "Concurrent processing under Parallel Virtual Machine ({PVM})", type = "M.S. thesis", school = "Department of Computer Science, Southern Illinois University at Carbondale", address = "Carbondale, IL, USA", pages = "vi + 97", year = "1994", bibdate = "Mon Jan 15 18:16:53 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Gropp:1994:MCL, author = "W. Gropp and E. Lusk", title = "The {MPI} communication library: its design and a portable implementation", crossref = "IEEE:1994:PSP", pages = "160--165", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C5440 (Multiprocessing systems); C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C6150N (Distributed systems)", conftitle = "Proceedings of Scalable Parallel Libraries Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "advanced features; Advanced features; implementation strategy; Implementation strategy; message passing; MPI communication library; MPI standard; parallel programming; portable implementation; Portable implementation; software portability; standard message-passing interface; Standard message-passing interface; standards", sponsororg = "Mississippi State Univ.; Nat. Sci. Found", thesaurus = "Message passing; Parallel programming; Software portability; Standards", treatment = "P Practical", } @InProceedings{Gropp:1994:SEP, author = "W. Gropp and B. Smith", title = "Scalable, extensible, and portable numerical libraries", crossref = "IEEE:1994:PSP", pages = "87--93", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C6110B (Software engineering techniques); C6120 (File organisation); C6180 (User interfaces)", keywords = "Aggressive data-structure-neutral implementation; Data structures; Implementation language; Meta-communication layer; Parallel communication technology; PETSc library; Portable Extensible Tools for Scientific computing; Portable numerical libraries; Software portability; Software technology; User interfaces; User-interface language", thesaurus = "Data structures; Software portability; User interfaces", } @Book{Gropp:1994:UMP, author = "William Gropp and Ewing Lusk and Anthony Skjellum", title = "Using {MPI}: Portable Parallel Programming with the Message-Passing Interface", publisher = pub-MIT, address = pub-MIT:adr, pages = "xx + 307", year = "1994", ISBN = "0-262-57104-8", ISBN-13 = "978-0-262-57104-3", LCCN = "QA76.642 G76 1994", bibdate = "Thu Feb 29 17:35:09 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$24.95", series = "Scientific and engineering computation", URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571048", acknowledgement = ack-nhfb, keywords = "Computer interfaces.; Parallel computers --- Programming.; Parallel programming; Parallel programming (Computer science)", } @Article{Gupta:1994:CTE, author = "M. Gupta and P. Banerjee", title = "Compile-time estimation of communication costs of programs", journal = j-J-PROGRAM-LANG, volume = "2", number = "3", pages = "191--225", month = sep, year = "1994", CODEN = "JPLAER", ISSN = "0963-9306", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", classification = "C6130 (Data handling techniques); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems)", fjournal = "Journal of Programming Languages", keywords = "Array references; Communication optimizations; Compile-time estimation; Compiler; Data distribution; Data movement; Data partitioning decisions; Distributed memory machines; Fortran programs; Global address space; High-level communication primitives; Loops; Paradigm compiler; Processors; Program analysis; Program communication costs; Ptran-II High-Performance Fortran prototype compiler; Traversal properties", pubcountry = "UK", thesaurus = "Data handling; Distributed memory systems; Optimising compilers; System monitoring", } @InProceedings{Haeuser:1994:RNS, author = "J. Haeuser and M. Spel and J. Muylaert and R. D. Williams", title = "Results for the {Navier--Stokes} Solver {ParNSS} on Workstation Clusters and {IBM SP1} Using {PVM}", crossref = "Wagner:1994:CFD", pages = "432--442", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Hakula:1994:FEM, author = "H. Hakula and J. Malinen and P. Kallberg and P. Valve", title = "The finite element method applied to the exterior {Helmholtz} problem on the {IBM SP-1}", crossref = "Dongarra:1994:PSC", pages = "262--269", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Helsinki Univ. of Technol., Espoo, Finland", classification = "A0230 (Function theory, analysis); A0260 (Numerical approximation and analysis); A4110H (Electromagnetic waves: theory); B0290P (Differential equations); B0290T (Finite element analysis); B5210 (Electromagnetic wave propagation); C4170 (Differential equations); C4185 (Finite element analysis); C6110P (Parallel programming); C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", keywords = "2D domains; Complex linear equations; Electromagnetic waves; Exterior Helmholtz problem; Finite element method; Helmholtz equation; IBM SP-1 machine; Irregular meshes; Monitoring facilities; Numerical analysis research; Parallel implementation; Parallel Virtual Machine environment; Performance; Quasi-minimal residual method; Regular meshes; Scattering problem; Single program multiple data model", thesaurus = "Electromagnetic wave scattering; Finite element analysis; Helmholtz equations; IBM computers; Parallel machines; Parallel programming; Physics computing; Software performance evaluation", } @TechReport{Hardwick:1994:PVL, author = "Jonathan C. Hardwick", title = "Porting a vector library: a comparison of {MPI}, {Paris}, {CMMD} and {PVM} (or, ``{I'll} never have to port {CVL} again'')", type = "Research paper", number = "CMU-CS-94-200", institution = inst-SCS-CMU, address = inst-SCS-CMU:adr, pages = "16", year = "1994", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper describes the design and implementation in MPI of the parallel vector library CVL, which is used as the basis for implementing nested data-parallel languages such as NESL and Proteus. We compare the ease of writing and debugging the portable MPI implementation of CVL with our experiences writing previous versions in CM-2 Paris, CM-5 CMMD, and PVM, and give initial performance results for MPI CVL running on an IBM SP- 1, Intel Paragon, and TMC CM-5.", acknowledgement = ack-nhfb, annote = "An earlier version of this paper appeared in `Proceedings of the 2nd Scalable Parallel Libraries Conference', Mississippi State University, Mississippi, October 1994. November 1994. Supported in part by the Wright Laboratory, Aeronautical Systems Center, Air Force Materiel Command, USAF, and the Advanced Research Projects Agency (ARPA). Supported in part by the Pittsburgh Supercomputing Center. Supported in part by the National Center for Supercomputing Applications. Supported in part by the Argonne National Laboratory.", keywords = "Parallel programming (Computer science)", } @Article{Hellberg:1994:PPP, author = "S. A. Hellberg and E. Zaluska", title = "A portable parallel programming environment based around {PCTE}", journal = j-INFO-SOFTWARE-TECH, volume = "36", number = "7", pages = "419--425", month = jul, year = "1994", CODEN = "ISOTE7", ISSN = "0950-5849 (print), 1873-6025 (electronic)", ISSN-L = "0950-5849", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Electron. and Comput. Sci., Southampton Univ., UK", classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", fjournal = "Information and Software Technology", keywords = "Distributed-memory; End-user applications; High Performance Fortran; High-Performance Computing Community programming standards; Massively-parallel-processor; Message-Passing Interface; PCTE; PCTE-based integrated toolset; Performance; Portable common tool environment; Portable parallel programming environment", pubcountry = "UK", thesaurus = "Message passing; Parallel programming; Programming environments", } @InProceedings{Hempel:1994:MSM, author = "R. Hempel", title = "The {MPI Standard for Message Passing}", crossref = "Gentzsch:1994:HPC", pages = "247--252", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "German Nat. Res. Center for Comput. Sci., St. Augustin, Germany", classification = "C5220P (Parallel architecture); C6110P (Parallel programming)", conftitle = "High-Performance Computing and Networking International Conference. Proceedings, Volume II: Networking and Tools", corpsource = "German Nat. Res. Center for Comput. Sci., St. Augustin, Germany", keywords = "message passing; Message passing; message-passing interfaces; Message-passing interfaces; MPI standard; parallel computing; Parallel computing; parallel programming; standard; Standard; standards", thesaurus = "Message passing; Parallel programming; Standards", treatment = "P Practical", } @Article{Henriksen:1994:PCF, author = "P. Henriksen and R. Keunings", title = "Parallel computation of the flow of integral viscoelastic fluids on a heterogeneous network of workstations", journal = j-INT-J-NUMER-METHODS-FLUIDS, volume = "18", number = "12", pages = "1167--1183", month = jun, year = "1994", CODEN = "IJNFDW", ISSN = "0271-2091", ISSN-L = "0271-2091", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Univ. Catholique de Louvain, Belgium", classification = "A4750 (Non-Newtonian dynamics); C4240P (Parallel programming and algorithm theory); C7320 (Physics and Chemistry)", fjournal = "International Journal for Numerical Methods in Fluids", keywords = "Computational mechanics problems; Compute-intensive treatment; Deformation; Dynamic allocation; Fibre suspension flow; Flow; Heterogeneous network of workstations; Integral viscoelastic fluids; Internal variables; Load balancing; Parallel algorithms; Parallel computation; Parallel efficiency; POLYFLOW package; Public domain PVM software library; Static allocation; Viscoplastic solids", pubcountry = "UK", thesaurus = "Flow simulation; Non-Newtonian flow; Parallel algorithms; Physics computing", } @InProceedings{Hiranandani:1994:CTB, author = "S. Hiranandani and K. Kennedy and J. Mellor-Crummey and A. Sethi", title = "Compilation techniques for block-cyclic distributions", crossref = "ACM:1994:CPI", pages = "392--403", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Rice Univ., Houston, TX, USA", classification = "C6110P (Parallel programming); C6140D (High level languages); C6150C (Compilers, interpreters and other processors)", keywords = "Block-cyclic distributions; Code; Compilers; Data alignment; Data-parallel languages; Fortran D; High-Performance Fortran; Linear-time algorithm; Memory access sequence; MIMD distributed-memory machines; Nonunit strides; Symbolic array dimensions; Symbolic loop bounds", thesaurus = "FORTRAN; Parallel languages; Program compilers", } @InProceedings{Issman:1994:PME, author = "E. Issman and G. Degrez and J. {De Keyser}", title = "A Parallel Multiblock {Euler\slash Navier--Stokes} Solver on a Cluster of Workstations Using {PVM}", crossref = "Gentzsch:1994:HPC", volume = "1", pages = "157--162", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "CFD Group, Von Karman Inst. for Fluid Dynamics, Rhode-St.-Genese, Belgium", classification = "A4710 (General fluid dynamics theory, simulation and other computational methods); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", corpsource = "CFD Group, Von Karman Inst. for Fluid Dynamics, Rhode-St.- Genese, Belgium", keywords = "adaptive 2D-multiblock Euler/Navier--Stokes; Adaptive 2D-multiblock Euler/Navier--Stokes solver; automatic load-; Automatic load-balancing; balancing; Block distribution; block distribution; cluster; environment; LOGO software library; Navier--Stokes equations; parallel; Parallel computer; parallel computer; parallel machines; Parallel multiblock Euler/Navier--Stokes solver; parallel multiblock Euler/Navier--Stokes solver; Parallel Virtual Machine communication software; Parallelised; parallelised; physics computing; Processors; processors; programming; PVM; resource allocation; Run-time; run-time; solver; Solver porting; solver porting; Unix; Unix workstation cluster; Unix workstation cluster environment; workstation; Workstation cluster", pubcountry = "Germany", thesaurus = "Navier--Stokes equations; Parallel machines; Parallel programming; Physics computing; Resource allocation; Unix", treatment = "P Practical", } @Article{Iwashita:1994:IPE, author = "S. Iwashita and K. Murakami", title = "Implementation and performances evaluation of {KU PVM3\slash AP1000}", journal = j-ENG-SCI-REP-KYUSHU, volume = "16", number = "3", pages = "345--352", month = dec, year = "1994", CODEN = "SRKHEK", ISSN = "0388-1717", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6110P (Parallel programming); C6150N (Distributed systems software)", fjournal = "Engineering Sciences Reports, Kyushu University", keywords = "Basic communication intensive benchmarks; KU PVM3/AP1000; Network configurations; Parallel computer; Parallel programming library; Performance evaluation; Virtual workstation cluster; Workstation clusters", language = "Japanese", pubcountry = "Japan", thesaurus = "Parallel machines; Parallel programming; Performance evaluation; Software libraries; Software performance evaluation", } @InProceedings{Joubert:1994:PAL, author = "A. Joubert", title = "Parallel algorithms for linear and nonlinear equations derived from networks", crossref = "Joubert:1994:PCT", pages = "145--152", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "London Parallel Applications Centre, Queen Mary and Westfield Coll., UK", classification = "C4140 (Linear algebra); C4150 (Nonlinear and functional equations); C4240P (Parallel programming and algorithm theory)", keywords = "Linear equations; Load flow; Nonlinear equations; Power systems", thesaurus = "Graph theory; Linear algebra; Network analysis; Nonlinear equations; Parallel algorithms", } @InProceedings{Judd:1994:PIV, author = "D. Judd and N. K. Ratha and P. K. McKinley and J. Weng and A. K. Jain", title = "Parallel implementation of vision algorithms on workstation clusters", crossref = "IEEE:1994:PIF", pages = "317--321 (vol. 3)", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "B6140C (Optical information, image and video signal processing); C1220 (Simulation, modelling and identification); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming)", keywords = "Distributed cluster platforms; Motion parameter estimation algorithm; Sequential CLUSTER program; Square-error data clustering method; Vision algorithms; Workstation clusters", thesaurus = "Computer vision; Parallel algorithms; Parameter estimation", } @Article{Karamcheti:1994:SOM, author = "Vijay Karamcheti and Andrew A. Chien", title = "Software overhead in messaging layers: where does the time go?", journal = j-SIGPLAN, volume = "29", number = "11", pages = "51--60", month = nov, year = "1994", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat May 1 15:50:17 MDT 1999", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org:80/pubs/citations/proceedings/asplos/195473/p51-karamcheti/", abstract = "Despite improvements in network interfaces and software messaging layers, software communication overhead still dominates the hardware routing cost in most systems. In this study, we identify the sources of this overhead by analyzing software costs of typical communication protocols built atop the active messages layer on the CM-5. We show that up to 50-70\% of the software messaging costs are a direct consequence of the gap between specific network features such as arbitrary delivery order, finite buffering, and limited fault-handling, and the user communication requirements of in-order delivery, end-to-end flow control, and reliable transmission. However, virtually all of these costs can be eliminated if routing networks provide higher-level services such as in-order delivery, end-to-end flow control, and packet-level fault-tolerance. We conclude that significant cost reductions require changing the constraints on messaging layers: we propose designing networks and network interfaces which simplify or replace software for implementing user communication requirements.", acknowledgement = ack-nhfb, classification = "B6150M (Protocols); B6210L (Computer communications); C5440 (Multiprocessing systems); C5610N (Network interfaces); C5640 (Protocols); C6150N (Distributed systems software)", conflocation = "San Jose, CA, USA; 4-7 Oct. 1994", conftitle = "Sixth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-VI)", corpsource = "Dept. of Comput. Sci., Illinois Univ., Urbana, IL, USA", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "active messages layer; arbitrary delivery order finite buffering; CM-5; communication protocols; cost reductions; cost reductions packet level fault tolerance; design; end-to-end flow control; hardware routing cost; in-order delivery; limited fault handling; measurement; message passing; messaging layers; network features; network interfaces; packet level fault tolerance; parallel machines; performance; protocols; reliable transmission; software communications overhead; software messaging costs; software messaging layers; software overhead; standardization; theory; user communication requirements", sponsororg = "ACM; IEEE Comput. Soc", subject = "{\bf D.4.4} Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf C.2.2} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Protocols. {\bf C.4} Computer Systems Organization, PERFORMANCE OF SYSTEMS. {\bf C.2.1} Computer Systems Organization, COMPUTER-COMMUNICATION NETWORKS, Network Architecture and Design. {\bf C.1.2} Computer Systems Organization, PROCESSOR ARCHITECTURES, Multiple Data Stream Architectures (Multiprocessors), Parallel processors**.", treatment = "P Practical", } @InProceedings{Karrels:1994:PAM, author = "E. Karrels and E. Lusk", title = "Performance Analysis of {MPI} Programs", crossref = "Dongarra:1994:PSW", pages = "195--200", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6115 (Programming support); C6150E (General utility programs); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", conftitle = "Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing", corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Oshkosh, WI, USA", keywords = "application program interfaces; functions library; message passing; Message Passing Interface; MPI programs; parallel computation; parallel programming; performance analysis; portable publicly available implementation; profiling interface; profiling libraries; profiling tools; software libraries; software performance evaluation; specification", treatment = "P Practical", } @InProceedings{Knies:1994:SLL, author = "A. D. Knies and F. R. Barriuso and W. J. Harrod and G. B. {Adams, III}", title = "{SLICC}: a low latency interface for collective communications", crossref = "IEEE:1994:PSW", pages = "89--96", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Purdue Univ., West Lafayette, IN, USA", classification = "C5220P (Parallel architecture); C6150N (Distributed systems software)", keywords = "Cray T3D; Directly memory access; Interprocessor communications; Low latency interface; Low-level collective communications interface; Parallel computers; Performance results; PVM; Referenced processing element; Shared address-space library interface; Shared distributed memory systems; SLICC; Software models", thesaurus = "Application program interfaces; Cray computers; Distributed memory systems; Message passing; Shared memory systems; Software libraries; Software performance evaluation", } @InProceedings{Konuru:1994:ULP, author = "R. Konuru and J. Casas and R. Prouty and S. Otto and J. Walpole", title = "A user-level process package for {PVM}", crossref = "Pierce:1994:PSH", pages = "48--55", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems)", corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Beaverton, OR, USA", keywords = "dynamic load balancing; lightweight; message passing; message-based; operating systems (computers); parallel; parallel programming; parallel programs; performance evaluation; processor; programming; PVM; resource allocation; source-code compatible PVM interface; SPMD-style PVM applications; standard PVM; UPVM; user-level process package; virtual processors; virtualization", sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput. Appl", treatment = "T Theoretical or Mathematical", } @InProceedings{Konuru:1994:UPP, author = "R. Konuru and J. Casas and R. Prouty and S. Otto and J. Walpole", title = "A User-Level Process Package for {PVM}", crossref = "Pierce:1994:PSH", pages = "48--55", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Beaverton, OR, USA", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems)", keywords = "Dynamic load balancing; Lightweight virtual processors; Message-based parallel programs; Parallel programming; Processor virtualization; PVM; Source-code compatible PVM interface; SPMD-style PVM applications; Standard PVM; UPVM; User-level process package; Virtual processors", thesaurus = "Message passing; Operating systems [computers]; Parallel programming; Performance evaluation; Resource allocation", xxnote = "Check author order.", } @InProceedings{Kramer-Fuhrmann:1994:TGP, author = "O. Kramer-Fuhrmann and L. Schafers and C. Scheidler", title = "{TRAPPER} --- a graphical programming environment for parallel systems", crossref = "Becks:1994:NCT", pages = "3--15", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "German Nat. Res. Center for Comput. Sci., St. Augustin, Germany", classification = "C5440 (Multiprocessing systems); C6115 (Programming support); C6130B (Graphics techniques); C6180G (Graphical user interfaces)", keywords = "Communicating sequential processes; Configtool; Designtool; Graphical programming environment; Graphical representation; Hybrid program development; Machine independent message passing interfaces; Parallel applications; Parallel Macros; Parallel systems; Parallel Virtual Machine; PARMACS; Perftool; Programming model; PVM; Sequential behavior; TRAPPER; Vistool; Visualization", thesaurus = "Communicating sequential processes; Graphical user interfaces; Parallel processing; Programming environments", } @Article{Lazar:1994:SRE, author = "A. A. Lazar and K. H. Tseng and Koon Seng Lim and W. Choe", title = "A scalable and reusable emulator for evaluating the performance of {SS7} networks", journal = j-IEEE-J-SEL-AREAS-COMMUN, volume = "12", number = "3", pages = "395--404", month = apr, year = "1994", CODEN = "ISACEM", DOI = "https://doi.org/10.1109/49.285300", ISSN = "0733-8716 (print), 1558-0008 (electronic)", ISSN-L = "0733-8716", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. Eng., Columbia Univ., New York, NY, USA", classification = "B6150C (Switching theory); B6210 (Telecommunication applications); C5620 (Computer networks and techniques); C5670 (Network performance); C7410F (Communications)", fjournal = "IEEE Journal on Selected Areas in Communications", keywords = "ATM LAN; Emulator design; Engineering workstations; Fault conditions; OSI Management Information Service platform; OSIMIS; Parallel Virtual Machine; Performance evaluation; Public domain software; Reusable emulator; Scalable emulator; Singapore; SS7 networks; Unbalanced loading conditions", thesaurus = "Asynchronous transfer mode; Open systems; Performance evaluation; Public domain software; Telecommunication signalling; Telecommunications computing", } @TechReport{Lehman:1994:IZP, author = "Li-wei Lehman", title = "Integrating {Zipcode} and {PVM}: towards a higher-level message-passing environment", type = "Technical report", number = "MSSU-EIRS-ERC 94-2", institution = "Engineering Research Center for Computational Field Simulation, " # inst-MSU, address = inst-MSU:adr, pages = "7", year = "1994", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "PVM (Computer program); Telecommunications --- Message processing.; Zipcode (Computer program)", } @InProceedings{Lin:1994:DNC, author = "Mengjou Lin and Jehwei Hsieh and D. H. C. Du and J. P. Thomas and J. A. MacDonald", title = "Distributed network computing over local {ATM} networks", crossref = "IEEE:1994:PSW", pages = "154--163", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Minnesota Univ., Duluth, MN, USA", classification = "C5620L (Local area networks); C6150E (General utility programs); C6150J (Operating systems); C6150N (Distributed systems software)", keywords = "Application programming interfaces; ASX-100 ATM Switch; Asynchronous transfer mode; BSD socket programming interface; Communication protocol layer; Distributed network computing; Distributed programming; End-to-end communication; Fore Systems ATM API; High-speed network standards; Local ATM networks; Message passing library; Parallel matrix multiplication; Parallel Virtual Machine; Performance characteristics; Remote Procedure Call; Switch-based high-speed local area networks; Workstations", thesaurus = "Application program interfaces; Asynchronous transfer mode; Local area networks; Matrix multiplication; Message passing; Telecommunication standards", } @InProceedings{Loh:1994:ISR, author = "B. C. Loh and G. A. Manson", title = "Incorporating software reuse into the {PCSC} methodology", crossref = "deGloria:1994:TAS", pages = "929--941", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Sheffield Univ., UK", classification = "C6110B (Software engineering techniques); C6140D (High level languages)", keywords = "Abstract representations; Occam 2; Parallel Communicating Sequential Code; PCSC methodology; Programming languages; PVM C; Software component reuse; Software reuse", thesaurus = "Occam; Software reusability", } @InProceedings{Lonsdale:1994:CMH, author = "G. Lonsdale and J. Clinckemaillie and S. Vlachoutsis and J. Dubois", title = "Crash-simulation migration to {HPC} systems", crossref = "Dekker:1994:MPP", pages = "439--446", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "ESI GmbH, Eschborn, Germany", classification = "C4185 (Finite element analysis); C6110P (Parallel programming); C7440 (Civil and mechanical engineering computing); C7480 (Production engineering computing)", keywords = "Algorithmic parallelization; Automatic parallelization; Automobile crashworthiness simulation program; Automobile testing; Body shell deformation calculation; CAD; CAMAS; Car crash simulation; Computer Aided Migration of Applications System; Digital simulation; Distributed-memory; FAM; Finite element model; High performance computing; MIMD; PAM-CRASH; PAM-STAMP; Parallel programming; Software portability", thesaurus = "Accidents; Automobile industry; Automobiles; CAD/CAM; Computer aided engineering; Digital simulation; Finite element analysis; Mechanical engineering computing; Parallel programming; Product development; Safety; Software portability; Testing", } @InProceedings{Lonsdale:1994:CRP, author = "G. Lonsdale and J. Clinckemaillie and S. Vlachoutsis and J. Dubois", title = "Communication requirements in parallel crashworthiness simulation", crossref = "Gentzsch:1994:HPC", pages = "55--61", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "ESI GmbH, Eschborn, Germany", classification = "C4185 (Finite element analysis); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C6185 (Simulation techniques); C7440 (Civil and mechanical engineering computing)", keywords = "Algorithmic features; Communication requirements; Communications strategy design; Communications strategy implementation; Distributed-memory MIMD machines; Global communication; Message-passing; MPI standard; Overhead minimisation; PAM-CRASH industrial crashworthiness simulation program, PAM-CRASH; Parallel crashworthiness simulation; Parallelization approach; PARMACS; Portable message-passing interfaces; PVM", thesaurus = "Application program interfaces; Digital simulation; Distributed memory systems; Finite element analysis; Message passing; Nonlinear dynamical systems; Parallel processing; Structural engineering computing", } @InProceedings{Maffeis:1994:SSD, author = "S. Maffeis", title = "System support for distributed computing", crossref = "Gentzsch:1994:HPC", pages = "293--301", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Zurich Univ., Switzerland", classification = "C6110J (Object-oriented programming); C6115 (Programming support); C6150N (Distributed systems software)", keywords = "Distributed computing; Distributed failure-resilient applications; Distributed parallel computing; ELECTRA; LINDA systems; Object-groups; Object-oriented communication; Object-oriented programming; PVM; Reliable multicast; Toolkit; Transputer system", thesaurus = "Distributed processing; Object-oriented programming; Software fault tolerance; Software tools", } @InProceedings{Malony:1994:PAP, author = "A. Malony and B. Mohr and P. Beckman and D. Gannon and S. Yang and F. Bodin", title = "Performance analysis of {pC++}: a portable data-parallel programming system for scalable parallel computers", crossref = "Siegal:1994:PEI", pages = "75--84", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Oregon Univ., Eugene, OR, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6120 (File organisation); C6140D (High level languages); C6150G (Diagnostic, testing, debugging and evaluating systems)", keywords = "C++ language extension; Concurrent aggregate collection classes; Distributed data structures; Embar; Fast Poisson solver; Memory hierarchy; NAS suite; Nearest neighbor grid computation; Parallel execution semantics; Parallel machine; PC++; Performance analysis; Performance tools; Portable data-parallel programming system; Scalability measurements; Scalable parallel computers; Sparse codes", thesaurus = "C language; Data structures; Parallel languages; Parallel machines; Parallel programming; Performance evaluation; Program testing; Software portability", } @MastersThesis{Manchek:1994:DIP, author = "Robert J. Manchek", title = "Design and implementation of {PVM} version 3", type = "M.S. thesis", school = inst-UTK, address = inst-UTK:adr, pages = "viii + 81", year = "1994", bibdate = "Mon Jan 15 18:16:58 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Computer networks; Computer software.; Parallel computers.", } @InProceedings{Marin:1994:GAL, author = "F. J. Marin and O. Trelles-Salazar and F. Sandoval", title = "Genetic Algorithms on {LAN-Message} Passing Architectures Using {PVM}: Application to the Routing Problem", crossref = "Davidor:1994:PPS", pages = "534--545 (or 534--543??)", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. de Arquitectura y Tecnologia de Computadores y Electronica, Malaga Univ., Spain", classification = "B6150P (Communication network design and planning); B6210L (Computer communications); C1180 (Optimisation techniques); C4240C (Computational complexity); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5620L (Local area networks); C6115 (Programming support); C6150N (Distributed systems software); C7410F (Communications computing); C7430 (Computer engineering)", corpsource = "Dept. de Arquitectura y Tecnologfa de Computadores y Electronica, Malaga Univ., Spain", keywords = "allocation; area networks; combinatorial; Combinatorial complexity; communication; Communication latency; complexity; computational complexity; computer architectures; computing; data communication; data communications; Data communications; data-passing load; Data-passing load; dynamic load balancing; Dynamic load balancing; fault; fault tolerant; Fault tolerant capabilities; genetic algorithm parallelization; Genetic algorithm parallelization; genetic algorithms; independent functions; Independent functions; information; Information redistribution; integrated software; integration tool; LAN-based message passing; LAN-based message passing computer architectures; latency; local; master node; Master node; message passing; optimization problem; Optimization problem; Parallel; parallel algorithms; parallel architectures; Parallel Virtual Machine; partial results reporting; Partial results reporting; public domain software; Public domain software; PVM 3.1; redistribution; resource; routing problem; Routing problem; server processors; Server processors; sockets; Sockets; software; Software integration tool; software tools; telecommunication computing; telecommunication network routing; tolerant capabilities; Virtual Machine; virtual machines", pubcountry = "Germany", thesaurus = "Computational complexity; Data communication; Fault tolerant computing; Genetic algorithms; Integrated software; Local area networks; Message passing; Parallel algorithms; Parallel architectures; Public domain software; Resource allocation; Software tools; Telecommunication computing; Telecommunication network routing; Virtual machines", treatment = "P Practical", } @InProceedings{Mattson:1994:PEP, author = "T. G. Mattson", title = "Programming Environments for Parallel Computing: a Comparison of {CPS}, {Linda}, {P4}, {PVM}, {POSYBL}, and {TCGMSG}", crossref = "Hesham:1994:PTS", volume = "II", pages = "586--594", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Intel Sci. Comput., Beaverton, OR, USA", classification = "C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Intel Sci. Comput., Beaverton, OR, USA", keywords = "Communication times; communication times; Cooperative Processes Software; CPS; Environment utility; environment utility; environments; Ethernet network; four-; Four-node communications tests; Linda; node communications tests; P4; parallel programming; performance evaluation; Portable parallel programming environments; portable parallel programming environments; POSYBL; programming; PVM; Reproducibility; reproducibility; software portability; SPARCstation 1; SPARCstation 1 workstations; TCGMSG; Theoretical Chemistry Group Message-passing system; Two-node communication benchmarks; two-node communication benchmarks; workstations", sponsororg = "IEEE; ACM; Univ. Hawaii; Univ. Hawaii Coll. Bus. Admin", thesaurus = "Parallel programming; Performance evaluation; Programming environments; Software portability", treatment = "P Practical; X Experimental", } @Article{Matyska:1994:DCS, author = "Lud{\u{e}}k Matyska and Jaroslav Ko{\v{c}}a", title = "{D-CICADA}: a software for conformational {PES} elucidation on network of workstations", journal = j-J-COMPUT-CHEM, volume = "15", number = "9", pages = "937--946", month = sep, year = "1994", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.540150904", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Thu Nov 29 14:54:27 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci., Masaryk Univ., Brno, Czech Republic", classification = "A3115 (General mathematical and computational developments for atoms and molecules); A3190 (Other topics in the theory of atoms and molecules); A3520B (General molecular conformation and symmetry; C6110P (Parallel programming); C7320 (Physics and chemistry computing); stereochemistry)", fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", keywords = "CICADA; Conformational potential energy hypersurface; Cyclohexane; D-CICADA software; DEC workstations; Distributed environment; Parallel virtual machine; Parallelization; Polynomial time; Sun workstations; Terminally blocked alanine; Virtual machines", onlinedate = "7 Sep 2004", thesaurus = "Organic compounds; Organic molecule configurations; Parallel programming; Physics computing; Potential energy curves and surfaces of molecules; Virtual machines", } @InProceedings{McKenzie:1994:CIM, author = "N. R. McKenzie and K. Bolding and C. Ebeling and L. Snyder", title = "{CRANIUM}: An Interface for Message Passing on Adaptive Packet Routing Networks", crossref = "Bolding:1994:PCR", pages = "266--280", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6150C (Communication switching); B6210L (Computer communications); C4230M (Multiprocessor interconnection); C5220P (Parallel architecture); C5610N (Network interfaces); C5620 (Computer networks and techniques)", conftitle = "Parallel Computer Routing and Communication. First International Workshop, PCRCW '94", corpsource = "Dept. of Comput. Sci. and Eng., Washington Univ., Seattle, WA, USA", keywords = "adaptive packet routing networks; arbitrary sequence; automatic- receive interface; buffer addresses; Cranium; interconnection network; message passing; multiprocessor interconnection networks; network interface; network interfaces; packet serialization; packet switching; physical node identifiers; processor-initiated interface; processor-network interface; user-level programs", treatment = "P Practical", } @Article{McKinney:1994:PGU, author = "G. W. McKinney", title = "A practical guide to using {MCNP} with {PVM}", journal = j-TRANS-AM-NUCL-SOC, volume = "71", number = "????", pages = "397--398", month = "????", year = "1994", CODEN = "TANSAO", ISSN = "0003-018X", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Los Alamos Nat. Lab., NM, USA", classification = "A2880F (Radiation monitoring and radiation protection); C6150N (Distributed systems software); C7470 (Nuclear engineering computing)", conflocation = "Washington, DC, USA; 13-17 Nov. 1994", conftitle = "1994 Winter Meeting of American Nuclear Society (papers in summary form only received)", corpsource = "Los Alamos Nat. Lab., NM, USA", fjournal = "Transactions of the American Nuclear Society", keywords = "distributed memory systems; distributed-memory multiprocessing; Distributed-memory multiprocessing; engineering computing; MCNP; Monte Carlo methods; nuclear; PVM; radiation protection", thesaurus = "Distributed memory systems; Monte Carlo methods; Nuclear engineering computing; Radiation protection", treatment = "P Practical", } @InProceedings{Miller:1994:PPP, author = "B. P. Miller and J. K. Hollingsworth and M. D. Callaghan", title = "The {Paradyn} parallel performance tools and {PVM}", crossref = "Dongarra:1994:PSW", pages = "201--210", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Wisconsin Univ., Madison, WI, USA", keywords = "applications; automated bottleneck searching; dynamic; heterogeneous program measurement; instrumentation; large-scale parallel applications; long-; machines; native PVM; Paradyn; parallel performance tools; parallel programming; performance problem causes; production-sized data sets; program diagnostics; running applications; software metrics; software performance evaluation; software tools; Sun; Thinking Machine CM-5; virtual; workstations", treatment = "P Practical", } @InProceedings{Miller:1994:PPT, author = "B. P. Miller and J. K. Hollingworth and M. D. Callaghan", title = "The {Paradyn} Performance Tools and {PVM}", crossref = "Dongarra:1994:PSW", pages = "201--210", year = "1994", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @MastersThesis{Nemer-Preece:1994:LBH, author = "Nicole Anne Nemer-Preece", title = "Load balancing the heat equation in a heterogeneous environment with {PVM}", type = "M.S. thesis", school = "University of Missouri, Rolla", address = "Rolla, MO, USA", pages = "viii + 52", year = "1994", bibdate = "Mon Jan 15 18:17:04 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Neun:1994:UPB, author = "W. Neun", title = "Using {PVM} based software for parallel computation in Computer Algebra", crossref = "Calmet:1994:RWC", pages = "46--51", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Konrad-Zuse-Zentrum fur Informationstech. Berlin, Germany", classification = "C4240P (Parallel programming and algorithm theory); C7310 (Mathematics computing)", corpsource = "Konrad-Zuse-Zentrum fur Informationstech. Berlin, Germany", keywords = "computer algebra; Computer algebra; manipulation; mathematics computing; network topology; Network topology; parallel algorithms; Parallel algorithms; parallel computation; Parallel computation; parallel machines; Parallel machines; performance benefit; Performance benefit; PVM based software; symbol", pubcountry = "Germany", sponsororg = "Univ. Karlsruhe", thesaurus = "Mathematics computing; Parallel algorithms; Symbol manipulation", treatment = "P Practical", } @InProceedings{Nguyen:1994:DCE, author = "S. T. Nguyen and B. J. Zook and Xiaodong Zhang", title = "Distributed computation of electromagnetic scattering problems using finite-difference time-domain decompositions", crossref = "IEEE:1994:PTI", pages = "85--93", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Instrum. and Space Res., Southwest Res. Inst., San Antonio, TX, USA", classification = "A0260 (Numerical approximation and analysis); A4110H (Electromagnetic waves: theory); B0290P (Differential equations); B5210 (Electromagnetic wave propagation); C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C7320 (Physics and chemistry computing)", keywords = "Communication pattern variations; Computing performance; Distributed computation; Distributed memory; Distributed workstation network; Electromagnetic scattering problems; Finite-difference time-domain decompositions; Load balancing; Numerical method; Parallelism; Partial differential equations; PVM; Scalability", thesaurus = "Distributed algorithms; Distributed memory systems; Electromagnetic wave scattering; Finite difference time-domain analysis; Partial differential equations; Physics computing", } @InProceedings{Nordling:1994:SOD, author = "P. Nordling and P. Fritzson", title = "Solving ordinary differential equations on parallel computers --- applied to dynamic rolling bearings simulation", crossref = "Dongarra:1994:PSC", pages = "397--415", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Linkoping Univ., Sweden", classification = "C4170 (Differential equations); C6150N (Distributed systems software); C6185 (Simulation techniques); C7440 (Civil and mechanical engineering computing)", keywords = "Dynamic rolling bearings simulation; Ethernet; Fine-grained synchronization; Initial value problems; LSODA; MIMD parallel computers; Ordinary differential equation solution; Parallelism; PARIX operating system; Parsytec GigaCube; PVM; Solaris 2.3; SPARC 10 workstation cluster; Speedup; Sun SPARCcenter 2000", thesaurus = "Differential equations; Digital simulation; Initial value problems; Machine bearings; Mechanical engineering computing; Parallel processing", } @InProceedings{Otto:1994:PVM, author = "S. W. Otto", title = "Processor Virtualization and Migration for {PVM}", crossref = "Dongarra:1994:PSW", pages = "66--75", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst. of Sci. and Technol., Portland, OR, USA", keywords = "context switch; distributed memory systems; distributed scheduling systems; interoperability; local communication speeds; Machine; machines; message passing; Migratable PVM; multi; parallel; Parallel Virtual; performance figures; process granularity; process level MPVM; processor virtualization; programming model; run realistic applications; semantic restrictions; threaded version; times; transparent migration; transparent migration mechanisms; virtual machines; work migration", treatment = "P Practical", } @Article{Phan-Thien:1994:CDL, author = "N. Phan-Thien and D. Tullock", title = "Completed double layer boundary element method in elasticity and {Stokes} flow: Distributed computing through {PVM}", journal = j-COMP-MECH, volume = "14", number = "4", pages = "370--383", month = jul, year = "1994", CODEN = "CMMEEE", ISSN = "0178-7675", bibdate = "Sat Apr 06 15:05:19 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Pierce:1994:NMP, author = "P. Pierce", title = "The {NX} message passing interface", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "463--480", month = apr, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems)", corpsource = "Intel Corp., Beaverton, OR, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "communication model; design tradeoffs; distributed memory systems; high performance; high performance interface; Intel multicomputers; massively parallel distributed memory supercomputers; message passing; multicomputer message passing; NX interface; NX message passing interface; parallel applications; parallel programming; performance; programming environments; typed send/receive model; usability; vendor- supplied programming interface", pubcountry = "Netherlands", treatment = "P Practical", } @InProceedings{Pierce:1994:PIN, author = "P. Pierce and G. Regnier", title = "The {Paragon} implementation of the {NX} message passing interface", crossref = "Pierce:1994:PSH", pages = "184--190", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C6115 (Programming support); C6150J (Operating systems); C6150N (Distributed systems)", conftitle = "Proceedings of IEEE Scalable High Performance Computing Conference", corpsource = "Intel Supercomput. Syst. Div., Beaverton, OR, USA", keywords = "hardware; high performance message passing; message passing; message passing design; message passing interface; NX; operating system; operating systems (computers); OSF/1; Paragon; parallel architectures; parallel machines; parallel supercomputer; performance; programming environment; programming environments", sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput. Appl", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Pozo:1994:FTE, author = "R. Pozo and K. Remington", title = "Fast three-dimensional elliptic solvers on distributed network clusters", crossref = "Joubert:1994:PCT", pages = "201--208", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", classification = "C4130 (Interpolation and function approximation); C4170 (Differential equations); C4240P (Parallel programming and algorithm theory)", keywords = "Distributed network clusters; Elliptic solvers; Object-oriented message passing interface; Parallel architecture; Parallel architectures; Spline collocation", thesaurus = "Distributed algorithms; Elliptic equations; Splines [mathematics]", } @InProceedings{Puthukattukaran:1994:DIP, author = "J. Puthukattukaran and S. Chalasani and P. Senapathy", title = "Design and implementation of parallel algorithms for gene-finding", crossref = "IEEE:1994:PTI", pages = "186--193", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. and Comput. Eng., Wisconsin Univ., Madison, WI, USA", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C7330 (Biology and medical computing)", keywords = "CM-5 multicomputer; DNA sequences; Gene-finding; HP Apollo workstations; Human Genome project; Parallel algorithm; Parallel algorithm design; Parallel gene-finding algorithm; Parallel Virtual Machine; PVM; Serial algorithm; Software package", thesaurus = "Biology computing; Cellular biophysics; DNA; Parallel algorithms; Parallel machines", } @Article{Reale:1994:PCU, author = "F. Reale and F. Bocchino and S. Sciortino", title = "Parallel computing on {Unix} workstation arrays", journal = j-COMP-PHYS-COMM, volume = "83", number = "2-3", pages = "130--140", month = dec, year = "1994", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Istituto e Osservatorie Astron., Palermo, Italy", classification = "A9575P (Mathematical and computer techniques in astronomy); C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150J (Operating systems); C6150N (Distributed systems software); C7350 (Astronomy and astrophysics computing)", fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", keywords = "2D hydrodynamic code; Alpha processors; Astrophysical flows; Data-domain decomposition; DECstations 3000/400; DECstations 5000/200; Dedicated MIMD parallel system; Ethernet LAN; FDDI LAN; Intel i860 processors; Massive parallel computations; Meiko Computing Surface; MIMD systems; Network bandwidth; Nondedicated parallel systems; Parallel computing; Parallelization library; Processor power; PVM software toolset; Software; Unix workstation arrays", pubcountry = "Netherlands", thesaurus = "Astronomy computing; Astrophysical fluid dynamics; Local area networks; Message passing; Parallel programming; Protocols; Software packages; Unix; Workstations", } @InProceedings{Rolfe:1994:PAP, author = "T. J. Rolfe", title = "{PVM}: An Affordable Parallel Processing Environment", crossref = "Anonymous:1994:SCC", pages = "118--125", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Saarinen:1994:EES, author = "S. Saarinen", title = "{EASYPVM} --- An Enhanced Subroutine Library for {PVM}", crossref = "Gentzsch:1994:HPC", volume = "2", pages = "267--272", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Center for Sci. Comput., Espoo, Finland", classification = "C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Center for Sci. Comput., Espoo, Finland", keywords = "approach; clear message passing programming; Clear message passing programming approach; EASYPVM; Enhanced subroutine library; enhanced subroutine library; global communication; Global communication routines; library; message passing; Message passing calls; message passing calls; Message passing library; parallel machines; parallel programming; Parallel virtual machine; parallel virtual machine; PICL/ParaGraph message tracing postprocessor; Process creation; process creation; PVM; PVM message passing; PVM message passing syntax; routines; software libraries; syntax; virtual machines", pubcountry = "Germany", thesaurus = "Message passing; Parallel machines; Parallel programming; Software libraries; Virtual machines", treatment = "P Practical", } @InProceedings{Scales:1994:DES, author = "D. J. Scales and M. S. Lam", title = "The design and evaluation of a shared object system for distributed memory machines", crossref = "USENIX:1994:PFU", pages = "101--114", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Comput. Syst. Lab., Stanford Univ., CA, USA", classification = "C5440 (Multiprocessing systems); C6120 (File organisation); C6150N (Distributed systems software)", keywords = "Automatic caching; CM-5; Data access; Data prefetch; Distributed memory machines; Global name space; High communication overheads; IBM SP1; Intel iPSC/860; Paragon; Parallel algorithm; Performance; Portable run-time system; Remote processors; SAM; Scientific algorithms; Shared data; Shared object system; Synchronization; System design; System evaluation; Workstations", thesaurus = "Cache storage; Distributed memory systems; Operating systems [computers]; Parallel algorithms; Parallel machines; Shared memory systems; Synchronisation", } @Article{Schmidt:1994:EAO, author = "B. K. Schmidt and V. S. Sunderam", title = "Empirical analysis of overheads in cluster environments", journal = j-CPE, volume = "6", number = "1", pages = "1--32", month = feb, year = "1994", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "C4230M (Multiprocessor interconnection); C5220P (Parallel architecture); C5470 (Performance evaluation and testing)", fjournal = "Concurrency, practice and experience", keywords = "Cluster environments; Communication delay; Concurrent computing; Heterogeneous processing elements; Load imbalance; Parallelism model; Partitioning strategies; Performance; PVM network computing system; Throughput", pubcountry = "UK", thesaurus = "Multiprocessing systems; Multiprocessor interconnection networks; Performance evaluation", } @InProceedings{Schmidt:1994:IAP, author = "M. Schmidt and R. Hanisch", title = "Implementation of an air pollution transport model on parallel hardware", crossref = "Dekker:1994:MPP", pages = "277--284", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "GMD-FIRST, Berlin, Germany", classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing)", keywords = "Air pollution analysis; Air pollution transport model; Berlin; Complex numerical models; Conurbations; MANNA computer; Operational management; Parallel computer; Parallel hardware; Programming interface; PVM; Run time measurements; Simulation environment; Simulation system; Smog situations; Urban planning", thesaurus = "Air pollution; Digital simulation; Flow simulation; Message passing; Parallel machines; Parallel programming; Physics computing; Town and country planning; Transport processes", } @TechReport{Schneenman:1994:DSS, author = "Richard D. Schneenman", title = "Distributed supercomputing software: experiences with the parallel virtual machine --- {PVM}", number = "NISTIR 5381", institution = "U.S. Dept. of Commerce, National Institute of Standards and Technology", address = "Gaithersburg, MD, USA", pages = "vi + 18", year = "1994", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, annote = "March 1994.", } @Article{Schoinas:1994:FGA, author = "Ioannis Schoinas and Babak Falsafi and Alvin R. Lebeck and Steven K. Reinhardt and James R. Larus and David A. Wood", title = "Fine-grain access control for distributed shared memory", journal = j-SIGPLAN, volume = "29", number = "11", pages = "297--306", month = nov, year = "1994", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat May 1 15:50:17 MDT 1999", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org:80/pubs/citations/proceedings/asplos/195473/p297-schoinas/", abstract = "This paper discusses implementations of fine-grain memory access control, which selectively restricts reads and writes to cache-block-sized memory regions. Fine-grain access control forms the basis of efficient cache-coherent shared memory. This paper focuses on low-cost implementations that require little or no additional hardware. These techniques permit efficient implementation of shared memory on a wide range of parallel systems, thereby providing shared-memory codes with a portability previously limited to message passing. This paper categorizes techniques based on where access control is enforced and where access conflicts are handled. We incorporated three techniques that require no additional hardware into Blizzard, a system that supports distributed shared memory on the CM-5. The first adds a software lookup before each shared-memory reference by modifying the program's executable. The second uses the memory's error correcting code (ECC) as cache-block valid bits. The third is a hybrid. The software technique ranged from slightly faster to two times slower than the ECC approach. Blizzard's performance is roughly comparable to a hardware shared-memory machine. These results argue that clusters of workstations or personal computers with networks comparable to the CM-5's will be able to support the same shared-memory interfaces as supercomputers.", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5320G (Semiconductor storage); C5440 (Multiprocessing systems); C6120 (File organisation)", conflocation = "San Jose, CA, USA; 4-7 Oct. 1994", conftitle = "Sixth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-VI)", corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Madison, WI, USA", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "access conflicts; Blizzard; block-sized memory regions; cache block valid bits; cache storage; cache-block-sized memory regions; cache-coherent shared memory; CM-5; design; distributed memory systems; distributed shared memory; ECC approach; error correcting code; fine-grain access control; low-cost implementations; measurement; parallel machines; parallel systems; performance; portability; security; shared memory codes; shared memory interfaces; shared memory reference; shared memory systems; software lookup; storage management; supercomputers; theory", sponsororg = "ACM; IEEE Comput. Soc", subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles, Shared memory. {\bf D.4.2} Software, OPERATING SYSTEMS, Storage Management, Distributed memories. {\bf D.4.6} Software, OPERATING SYSTEMS, Security and Protection, Access controls.", treatment = "P Practical", } @InProceedings{Seyfarth:1994:GEE, author = "B. R. Seyfarth and J. L. Bickham and M. R. Fernandez", title = "Glenda: an environment for easy parallel programming", crossref = "Pierce:1994:PSH", pages = "637--641", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Southern Mississippi Univ., Hattiesburg, MS, USA", classification = "C6110P (Parallel programming); C6115 (Programming support); C7320 (Physics and Chemistry); C7430 (Computer engineering)", keywords = "Benchmark; C programming language; Communication functions; Coordination language; Glenda; Global tuple space; Parallel programming environment; Parallel Virtual Machine; Preprocessor; PVM message passing functions; Software package; Tuple server process; Underwater acoustic modeling", thesaurus = "Acoustic analysis; File servers; Message passing; Parallel programming; Physics computing; Programming environments; Underwater sound; Virtual machines", } @InProceedings{Shee:1994:DMA, author = "Jang Chung Shee and Chao Chin Wu and Lin Wen You and Cheng Chen", title = "Design of a multithread architecture and its parallel simulation and evaluation environment", crossref = "Anonymous:1994:ICS", pages = "69--76 (vol. 1)", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci. and Inf. Eng., Nat. Chiao Tung Univ., Hsinchu, Taiwan", classification = "C5220P (Parallel architecture); C6115 (Programming support); C6185 (Simulation techniques)", keywords = "Context switch; Integrated multiprocessing simulation environment; Multithread architecture; Parallel simulation; Parallel simulation and evaluation environment; Parallel Virtual Machine; SUN SPARC workstations; Thread-related instructions", thesaurus = "Digital simulation; Parallel architectures; Programming environments", } @InProceedings{Shelton:1994:FPS, author = "W. A. Shelton and G. M. Stocks and F. J. Pinski and R. G. Jordan and Y. Liu and L. Qui and J. B. Staunton and D. D. Johnson and B. Ginatempo", title = "First principles simulation of materials properties", crossref = "Pierce:1994:PSH", pages = "103--110", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Nat. Lab., TN, USA", classification = "A3100 (Theory of atoms and molecules); C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6185 (Simulation techniques); C7320 (Physics and Chemistry)", keywords = "Ag-Mg alloy system; Electronic origin; Electronic structure; First principles simulation; High performance workstations; Local computer environment; Materials properties; Order-disorder temperature; Ordered materials; Parallel computer code; Physics computing; PVM3 3; Short-range order intensity; Substitutionally disordered materials; Vector supercomputers", thesaurus = "Digital simulation; Fermi surface; Materials properties; Molecular electronic states; Parallel machines; Parallel programming; Physics; Physics computing", } @InProceedings{Shing:1994:UPC, author = "C.-C. Shing", title = "Use {PVM} on computation of analysis of repeated measurement designs", crossref = "Sall:1994:CIS", pages = "139--142", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1140Z (Other topics in statistics); C5440 (Multiprocessing systems); C6110P (Parallel programming); C7310 (Mathematics computing)", corpsource = "Dept. of Comput. Sci., Radford Univ., VA, USA", keywords = "computation; concurrent; designed experiments; distributed memory; distributed memory systems; heterogeneous network; parallel computer; parallel programming; parallelized sweep operator; PVM; regression; repeated measurement designs; software package; statistical analysis; sweep operation", sponsororg = "Interface Found. North America", treatment = "P Practical", } @Article{Skjellum:1994:DEZ, author = "A. Skjellum and S. G. Smith and N. E. Doss and A. P. Leung and M. Morari", title = "The design and evolution of {Zipcode}", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "565--596", day = "31", month = mar, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", classification = "C5440 (Multiprocessor systems and techniques); C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems)", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "Collective operations; Communication contexts; Gather-send; Homogeneous computer networks; Large-scale multicomputer software; Mailer data structure; Message passing; MPI standard; Multicomputers; Point-to-point communication; Process-management system; Receive-scatter semantics; Runtime optimizations; Static process groups; Virtual topologies; Zipcode", pubcountry = "Netherlands", thesaurus = "Data structures; Message passing; Multiprocessing systems; Parallel programming", } @InProceedings{Skjellum:1994:WLM, author = "A. Skjellum and N. E. Doss and P. V. Bangalore", title = "Writing libraries in {MPI}", crossref = "IEEE:1994:PSP", pages = "166--173", year = "1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", classification = "C4140 (Linear algebra); C6110P (Parallel programming); C6150N (Distributed systems software); C6150N (Distributed systems)", conftitle = "Proceedings of Scalable Parallel Libraries Conference", corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", keywords = "cluster; Cluster; code fragments; Code fragments; linear algebra; linear algebra library; Linear algebra library; message passing; Message passing; message-passing systems; Message-passing systems; MPI; multicomputer; Multicomputer; parallel libraries; Parallel libraries; parallel programming; standard; Standard; subroutines; virtual topology; Virtual topology", sponsororg = "Mississippi State Univ.; Nat. Sci. Found", thesaurus = "Linear algebra; Message passing; Parallel programming; Subroutines", treatment = "P Practical", } @InProceedings{Sloot:1994:CIO, author = "P. M. A. Sloot and A. G. Hoekstra and L. O. Hertzberger", title = "A comparison of the {Iserver-Occam}, {Parix}, {Express}, and {PVM} programming environments on a {Parsytec GCel}", crossref = "Gentzsch:1994:HPC", volume = "2", pages = "253--259", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240C (Computational complexity); C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Dept. of Comput. Syst., Amsterdam Univ., Netherlands", keywords = "communication capabilities; computational complexity; development time; environments; Express; floating point performance; global communication times; Iserver-Occam; parallel programming; Parix; Parsytec GCel; point to point communication; portability; programmability; programming; PVM; software performance evaluation; time complexity analysis; times", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Sloot:1994:CIP, author = "P. M. A. Sloot and A. G. Hoekstra and L. O. Hertzberger", title = "A Comparison of the {Iserver-Occam}, {Parix}, {Express}, and {PVM} Programming Environments on a {Parsytec GCel}", crossref = "Gentzsch:1994:HPC", pages = "253--259", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Syst., Amsterdam Univ., Netherlands", classification = "C4240C (Computational complexity); C6110P (Parallel programming); C6115 (Programming support)", keywords = "Communication capabilities; Development time; Express; Floating point performance; Global communication times; Iserver-Occam; Parallel programming environments; Parix; Parsytec GCel; Point to point communication times; Portability; Programmability; PVM; Time complexity analysis", thesaurus = "Computational complexity; Parallel programming; Programming environments; Software performance evaluation", } @InProceedings{Stephens:1994:PBT, author = "R. Stephens", title = "Parallel benchmarks on the {Transtech Paramid} supercomputer", crossref = "deGloria:1994:TAS", pages = "136--146", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing)", keywords = "Application fields; Distributed memory parallel supercomputer; GENESIS; Intel i860-XP processors; NAS suites; Parallel benchmarks; PARMACS codes; Portable parallel codes; Portable PVM; Transtech Paramid supercomputer; Workstation clusters", thesaurus = "Distributed memory systems; Parallel processing; Performance evaluation", } @Article{Still:1994:PPC, author = "C. H. Still", title = "Portable parallel computing via the {MPI1} message-passing standard", journal = j-COMPUT-PHYS, volume = "8", number = "5", pages = "533--536, 538--539", month = sep # "--" # oct, year = "1994", CODEN = "CPHYE2", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Lasers and Energy Comput. Div., Lawrence Livermore Nat. Lab., CA, USA", classification = "C6110P (Parallel programming); C6140D (High level languages); C6150N (Distributed systems software)", fjournal = "Computers in Physics", keywords = "C languages binding; C++ bindings; Communicator; Fortran binding; Functionality; Hardware; Message-passing routine library; MPI1 message-passing standard; Portable parallel computing; Receive routine; Send routine; Vendor-independent message-passing library", thesaurus = "C language; FORTRAN; Message passing; Object-oriented languages; Parallel programming", } @Article{Stone:1994:PSO, author = "L. C. Stone and S. B. Shukla and B. Neta", title = "Parallel satellite orbit prediction using a workstation cluster", journal = j-COMPUT-MATH-APPL, volume = "28", number = "8", pages = "1--8", month = oct, year = "1994", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Naval Postgraduate Sch., Monterey, CA, USA", classification = "A9385 (Instrumentation and techniques for geophysical, hydrospheric and lower atmosphere research); A9575P (Mathematical and computer techniques); C5440 (Multiprocessor systems and techniques); C7350 (Astronomy and astrophysics)", fjournal = "Computers and Mathematics with Applications", keywords = "Function decomposition techniques; Parallel computing; Parallel satellite orbit prediction; Parallel Virtual Machine; Performance metric; SUN workstations; Workstation cluster", pubcountry = "UK", thesaurus = "Artificial satellites; Astronomy computing; Parallel processing; Workstations", } @Article{Strok:1994:NJI, author = "Dale C. Strok", title = "In the News: {Jupiter} impacts: Resolution makes a big difference. Supercomputer farming down under. {HPF Forum} welcomes comments. {Smithsonian Awards} honor computational scientists. Low-life computer viruses. {PVM} developers get {R\&D-100} award. The eyes have it. Neural nets detect breast cancer. Better cars through cooperation. Parallel version of global climate model. {Lockheed} to run {Idaho National Engineering Lab}. Public-private partners: new drugs, new software", journal = j-IEEE-COMPUT-SCI-ENG, volume = "1", number = "3", pages = "88--90", month = "Fall", year = "1994", CODEN = "ISCEE4", ISSN = "1070-9924 (print), 1558-190X (electronic)", ISSN-L = "1070-9924", bibdate = "Sat May 25 13:29:25 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Computational Science \& Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99", } @InProceedings{Sunderam:1994:GPP, author = "V. Sunderam", title = "General Purpose Parallel Computing with {PVM}", crossref = "Anonymous:1994:PPC", pages = "185--198", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Sunderam:1994:MSH, author = "V. S. Sunderam", title = "Methodologies and systems for heterogeneous concurrent computing", crossref = "Joubert:1994:PCT", pages = "29--45", year = "1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "C4240P (Parallel programming and algorithm theory); C6150N (Distributed systems software)", keywords = "Heterogeneous concurrent computing; Parallel algorithm; Parallel processing; Partitioning; Performance aspects; PVM system; Scheduling", thesaurus = "Parallel algorithms; Scheduling", } @Article{Sunderam:1994:PCC, author = "V. S. Sunderam and G. A. Geist and J. Dongarra and R. Manchek", title = "The {PVM} concurrent computing system: Evolution, experiences, and trends", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "531--545", day = "31", month = mar, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:14:00 MDT 1999", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=861", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "B6210L (Computer communications); C5620 (Computer networks and techniques); C6110P (Parallel programming); C6150N (Distributed systems)", corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "auxiliary facilities; Auxiliary facilities; case studies; Case studies; climate modeling; Climate modeling; communication overheads; Communication overheads; computer networks; computing model; Computing model; environmental science; Environmental science; experimental enhancements; Experimental enhancements; heterogeneous; heterogeneous concurrent computing; Heterogeneous concurrent computing; Heterogeneous networked computing platforms; interface; large scale scientific supercomputing; Large scale scientific supercomputing; materials science; Materials science; message passing; message passing model; Message passing model; MPP; MPP support; network computing; Network computing; networked computing platforms; networked environments; Networked environments; parallel processing; Parallel processing; parallel programming; process groups; Process groups; programming; Programming interface; PVM concurrent computing system; software framework; Software framework; support", pubcountry = "Netherlands", thesaurus = "Computer networks; Message passing; Parallel programming", treatment = "P Practical", } @InProceedings{Sydow:1994:PSA, author = "A. Sydow", title = "Parallel simulation of air pollution", crossref = "Pehrson:1994:IPP", journal = j-IFIP-TRANS-A, volume = "A-52", pages = "605--612", year = "1994", CODEN = "ITATEC", ISSN = "0926-5473", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "GMD-Res. Inst. for Comput. Archit. and Software Technol., Berlin, Germany", classification = "A8670G (Atmosphere); A9260T (Air quality and air pollution); A9365 (Data acquisition, processing and storage); C6110P (Parallel programming); C7340 (Geophysics)", fjournal = "IFIP Transactions. A. Computer Science and Technology", keywords = "Air pollutant transport models; Air pollution modelling; Berlin-Brandenburg area, Germany; CM-5; Equations; Eulerian models; FORGE; Lagrangian models; MANNA; Meteorological models; Model domain decomposition method; Model parallelization; Numerical algorithms; Parallel hardware; Parallel simulation; PARMACS; PVM; Runtime measurements; Software tools; Transputer system; Workstation cluster", thesaurus = "Air pollution; Digital simulation; Environmental science computing; Geophysics computing; Numerical analysis; Parallel processing; Software tools", } @InProceedings{Thomas:1994:PSA, author = "S. J. Thomas and J. Cote", title = "Parallel {Semi-Lagrangian} Advection using {PVM}", crossref = "Dekker:1994:MPP", pages = "801--808", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. de Recherche en Prevision Numerique, Environnement Canada, Dorval, Que., Canada", classification = "A0260 (Numerical approximation and analysis); A0340G (Fluid dynamics: general mathematical aspects); A4710 (General fluid dynamics theory, simulation and other computational methods); C4160 (Numerical integration and differentiation); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", keywords = "Computational fluid dynamics; Courant Friedrichs Lewy condition; Distributed MIMD implementation; Eulerian methods; Intel iPSC/860; Parallel algorithms; Parallel performance; Parallel semi-Lagrangian advection; Passive advection problem; Processor; PVM; Scalable code; Sub-grid dimensions; Time steps", thesaurus = "Distributed memory systems; Fluid dynamics; Integration; Parallel algorithms; Physics computing; Software performance evaluation; Transport processes", } @InProceedings{Thomsen:1994:RTS, author = "P. G. Thomsen", title = "Real time simulation in a cluster computing environment", crossref = "Dongarra:1994:PSC", pages = "493--497", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. for Math. Modelling, Tech. Univ. Denmark, Lyngby, Denmark", classification = "C5620L (Local area networks); C6150J (Operating systems); C6185 (Simulation techniques); C7460 (Aerospace engineering computing); C7810C (Computer-aided instruction)", keywords = "Airplane flying; Cluster computing environment; Differential algebraic equations; FDDI-ring; Mathematical problem; Personnel training; PVM; Real time simulation; Real time update; Ship manoeuvering; Simulator design; Systems variables; Workstation cluster", thesaurus = "Aerospace simulation; Aircraft; Computer based training; Digital simulation; FDDI; Local area networks; Operating systems [computers]; Personnel; Real-time systems; Ships; Workstations", } @InProceedings{Trefftz:1994:DPE, author = "C. Trefftz and C. C. Huang and P. K. McKinley and T. Y. Li and Z. Zeng", title = "Design and performance evaluation of a distributed eigenvalue solver on a workstation cluster", crossref = "IEEE:1994:IPN", pages = "608--615", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C5470 (Performance evaluation and testing); C6110P (Parallel programming)", keywords = "Bisection algorithm; Distributed eigenvalue solver; High-performance workstations; Interprocess communication packages; Laguerre iteration; P4; Parallel algorithm; Parallel scientific computing; Parallel split-merge; Performance evaluation; Performance study; PVM; Rank two splitting; Separation property; Split-merge technique; Standard matrix types; Symmetric tridiagonal matrices; Workstation cluster", thesaurus = "Eigenvalues and eigenfunctions; Parallel algorithms; Parallel programming; Performance evaluation; Workstations", } @InProceedings{Trelles-Salazar:1994:MSS, author = "O. Trelles-Salazar and E. L. Zapata and J.-M. Carazo", title = "Mapping strategies for sequential sequence comparison algorithms on {LAN-based} message passing architectures", crossref = "Gentzsch:1994:HPC", pages = "197--202", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Archit., Malaga Univ., Spain", classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5620L (Local area networks); C5630 (Networking equipment); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C6160Z (Other DBMS); C7330 (Biology and medical computing)", keywords = "Communication latency; Dynamic load balancing; Fault tolerant capabilities; File server; Guided self scheduling; LAN-based message passing architectures; Mapping strategies; Overall data-passing load; Public-domain PVM 3.1 system; Sequential sequence comparison algorithms; Simple workstation clusters; Socket to socket communications; Software-integration tool", thesaurus = "Biology computing; Computer architecture; File servers; Local area networks; Message passing; Parallel programming; Resource allocation; Scheduling; Sequences; Software fault tolerance; Software portability; Very large databases; Workstations", } @InProceedings{Uhl:1994:PCC, author = "A. Uhl", title = "Parallel Compact Coding of Satellite Images with Wavelet Packets using {PVM}", crossref = "Kumar:1994:PPI", pages = "382--387", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Varadarajan:1994:FDT, author = "V. Varadarajan and R. Mittra", title = "Finite-difference time-domain ({FDTD}) analysis using distributed computing", journal = j-IEEE-MICROW-GUIDED-WAVE-LETT, volume = "4", number = "5", pages = "144--145", month = sep # "\slash " # oct, year = "1994", CODEN = "IMGLE3", DOI = "https://doi.org/10.1109/75.289515", ISSN = "1051-8207 (print), 1558-2329 (electronic)", ISSN-L = "1051-8207", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Electromagnetic Commun. Lab., Illinois Univ., Champaign, IL, USA", classification = "B0290Z (Other numerical methods); B5100 (Electric and magnetic fields); B5200 (Electromagnetic waves, antennas and propagation); C4190 (Other numerical methods); C7310 (Mathematics); C7410D (Electronic engineering)", fjournal = "IEEE Microwave and Guided Wave Letters", keywords = "Electromagnetics; FDTD calculations; Finite-difference time-domain analysis; Linear speedup; Parallel distributed computing; Parallel Virtual Machine; PVM 3.2; Three-dimensional rectangular cavity", thesaurus = "Cavity resonators; Distributed processing; Electromagnetic field theory; Electronic engineering computing; Finite difference time-domain analysis; Mathematics computing; Parallel algorithms", } @InProceedings{Vaughan:1994:MPM, author = "P. L. Vaughan and A. Skjellum and D. S. Reese and Fei-Chen Cheng", title = "Migrating from {PVM} to {MPI}. {I}. The {Unify} system", crossref = "IEEE:1994:FSF", pages = "488--495", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "NSF Eng. Res. Center for Computational Field Simulation, Mississippi State Univ., MS, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "NSF Eng. Res. Center for Computational Field Simulation, Mississippi State Univ., MS, USA", keywords = "evolution path; Evolution path; message passing; message Passing Interface; Message Passing Interface; Message passing system; MPI; parallel libraries; Parallel libraries; parallel programming; portability system; Portability system; PVM; software portability; standard notation; Standard notation; system; Unify system", sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.; NASA; Univ. Maryland Inst. Adv. Comput. Studies; George Mason Univ", thesaurus = "Message passing; Parallel programming; Software portability", treatment = "P Practical", } @InProceedings{vonHanxleden:1994:VDF, author = "R. von Hanxleden and K. Kennedy and J. Saltz", title = "Value-based distributions in {Fortran D}", crossref = "Gentzsch:1994:HPC", pages = "434--440", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Rice Univ., Houston, TX, USA", classification = "C6110P (Parallel programming); C6120 (File organisation); C6140D (High level languages); C6150C (Compilers, interpreters and other processors)", keywords = "Access locality; Access patterns; Data-parallel language; Fortran D; Index-based distributions; Inter-processor locality; Intra-processor locality; Irregular applications; Scalability; Sequential data structures; Value-based distributions", thesaurus = "Data structures; FORTRAN; Parallel languages; Parallelising compilers", } @Article{Walker:1994:DSM, author = "David W. Walker", title = "The design of a standard message passing interface for distributed memory concurrent computers", journal = j-PARALLEL-COMPUTING, volume = "20", number = "4", pages = "657--673", day = "31", month = mar, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See erratum \cite{Walker:1994:EDS}.", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=865; http://www.epm.ornl.gov/~walker/mpi/papers/parcomp94.ps.Z", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C5610N (Network interfaces)", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "application topologies; Application topologies; application topologies, Standard message passing interface; collective communication; communication contexts; Communication contexts; communication contexts; communication routines; Communication routines; distributed memory concurrent computers; Distributed memory concurrent computers; distributed memory concurrent computers; distributed memory systems; message passing; MIMD; MPI; network interfaces; point-to-point communication; process groups; Process groups; standard message passing interface; standards", pubcountry = "Netherlands", thesaurus = "Distributed memory systems; Message passing; Network interfaces; Standards", treatment = "P Practical", } @Article{Walker:1994:EDS, author = "David W. Walker", title = "Erratum to: {``The design of a standard message passing interface for distributed memory concurrent computers''}", journal = j-PARALLEL-COMPUTING, volume = "20", number = "8", pages = "1215--1215", month = aug, year = "1994", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Apr 06 15:06:32 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See \cite{Walker:1994:DSM}.", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "application topologies; collective communication; communication contexts; distributed memory concurrent computers; message passing; point-to-point communication; process groups; standards", } @InProceedings{Wark:1994:PIR, author = "P. Wark and J. Holt", title = "{PVM} Implementation of a Repeated Matching Heuristic For Vehicle Routing", crossref = "Arnold:1994:PCT", pages = "207--216 (or 207--214??)", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput., Univ. of Southern Queensland, Toowoomba, Qld., Australia", classification = "C4240P (Parallel programming and algorithm theory); C6120 (File organisation); C7430 (Computer engineering)", corpsource = "Dept. of Math. and Comput., Univ. of Southern Queensland, Toowoomba, Qld., Australia", keywords = "Benchmark problems; benchmark problems; computational complexity; MIMD parallel computer; NP-hard problem; parallel algorithms; Parallel Virtual Machine; PVM implementation; Repeated matching heuristic; repeated matching heuristic; routing; Software package PVM; software package PVM; structure; structures; SUN workstations; tree; tree data; Tree structure; vehicle; Vehicle routing; virtual machines", pubcountry = "Netherlands", thesaurus = "Computational complexity; Parallel algorithms; Tree data structures; Virtual machines", treatment = "A Application; P Practical", } @Article{Welch:1994:PVM, author = "L. R. Welch", title = "A Parallel Virtual Machine for Programs Composed of Abstract Data Types", journal = j-IEEE-TRANS-COMPUT, volume = "43", number = "11", pages = "1249--1261", month = nov, year = "1994", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/12.324558", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110J (Object-oriented programming); C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems)", corpsource = "Dept. of Comput. and Inf. Sci., New Jersey Inst. of Technol., Newark, NJ, USA", fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", keywords = "abstract data types; ADTs; ARC; Asynchronous Remote Procedure Call; automatic parameter restoration; data structures; data synchronization; database management; dynamic load balancing; languages; machines; modularity; multiprocessing programs; parallel programming; parallel virtual machine; programming; remote procedure calls; reuse; software reusability; system development; systems; virtual", treatment = "P Practical", } @InProceedings{White:1994:VVC, author = "R. White", title = "{VCMON} --- the {VM\slash ESA Connectivity Monitor}", crossref = "Anonymous:1994:PSE", pages = "783--792", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Velocity Software Inc., Boston, MA, USA", classification = "C6150G (Diagnostic, testing, debugging and evaluating systems); C6150J (Operating systems); C6150N (Distributed systems software)", keywords = "ADSM; AVS; Computer architecture; Connectivity product; LFS; PVM; RSCS; TCP/IP; VCMON; Virtual machine; VM; VM/ESA Connectivity Monitor; VTAM", thesaurus = "Client-server systems; Open systems; Operating systems [computers]; System monitoring; Virtual machines", } @PhdThesis{Wilhelms:1994:DAL, author = "Gerhard Wilhelms", title = "{Dynamische adaptive Lastverteilung f{\"u}r PVM mittels unscharfer Benutzerprofile -- $ \mbox {PVM}^+ $ (English: Dynamic adaptive load distribution for PVM by blurred user profiles -- $ \mbox {PVM}^+ $ ).}", type = "Dissertation", school = "Math.-Naturwiss. Fakult{\"a}t, Universit{\"a}t Augsburg", address = "Augsburg, Germany", pages = "iv + 74", year = "1994", bibdate = "Sat Apr 06 15:01:28 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @InProceedings{Yan:1994:PTA, author = "J. C. Yan", title = "Performance tuning with {AIMS} --- an {Automated Instrumentation and Monitoring System} for multicomputers", crossref = "Hesham:1994:PTS", pages = "625--633", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "RECOM Technol., NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C5470 (Performance evaluation and testing); C6110P (Parallel programming); C6130B (Graphics techniques); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems)", keywords = "AIMS; Automated Instrumentation and Monitoring System; C programs; Data collection overhead; Event recorders; FORTRAN programs; Multicomputers; Multiprocessors; Parallel program execution; Parallel programming paradigm; Performance data collection; Performance evaluation; Performance tuning; PVM; Resource allocation algorithms; Run-time performance-monitoring library; Scalable multiprocessor; Software toolkit; Source-code instrumentor; Trace post-processor; Trace-file analysis; Trace-file animation", thesaurus = "Computer animation; Computerised instrumentation; Computerised monitoring; Data acquisition; Multiprocessing systems; Parallel programming; Performance evaluation; Resource allocation; System monitoring; Tuning", } @Article{Yi:1994:PID, author = "Sung Yi and K. H. Pierson and M. F. Ahmad", title = "Parallel implementation of dynamic simulation to filamentary composite structures with general rate dependent damping", journal = j-COMPUT-SYST-ENG, volume = "5", number = "4-6", pages = "469--477", month = aug # "--" # dec, year = "1994", CODEN = "COSEEO", ISSN = "0956-0521", ISSN-L = "0956-0521", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Nat. Center for Supercomput. Applications, Illinois Univ., Urbana, IL, USA", classification = "C4130 (Interpolation and function approximation); C4185 (Finite element analysis); C6110P (Parallel programming); C7440 (Civil and mechanical engineering computing)", fjournal = "Computing systems in engineering: an international journal", keywords = "CM-5; Conjugate gradient algorithm; Dynamic simulation; Dynamic viscoelastic finite element algorithm; Filamentary composite structures; Generic message passing library; PVM master/slave visco-elastic finite element program; Rate dependent damping; Scalable distributed parallel environment", thesaurus = "Conjugate gradient methods; Damping; Digital simulation; Finite element analysis; Message passing; Parallel programming; Structural engineering computing; Viscoelasticity", } @InProceedings{Zdetsis:1994:PMD, author = "A. D. Zdetsis and R. Biswas", title = "A Parallel Molecular Dynamics Strategy For {PVM}", crossref = "Turchi:1994:SDA", pages = "713--718", year = "1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Zemla:1994:WTC, author = "A. Zemla", title = "Wavelet transforms computing on {PVM}", crossref = "Dongarra:1994:PSC", pages = "534--546", year = "1994", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel computing methods are developed especially in centers that have expensive multiprocessor computers. The advantage of PVM (Parallel Virtual Machine) is that it permits a network of heterogeneous Unix computers to be used as a single large parallel computer. Thus large computational problems can be solved by using the aggregate power of many computers. We present some PVM computational experiments of wavelet transforms in image processing. Some PVM experiments were carried out on an IBM PC 486 working under the LINUX system.", acknowledgement = ack-nhfb, affiliation = "Inst. of Math., Polish Acad. of Sci., Warsaw, Poland", classification = "B0230 (Integral transforms); B0290Z (Other numerical methods); B6140C (Optical information, image and video signal processing); C1130 (Integral transforms); C1250 (Pattern recognition); C4190 (Other numerical methods); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Inst. of Math., Polish Acad. of Sci., Warsaw, Poland", keywords = "aggregate power; Aggregate power; IBM PC; image processing; Image processing; large parallel computer; LINUX system; machines; methods; microcomputer applications; multiprocessor computers.; Multiprocessor computers.; network; network of heterogeneous Unix computers; Network of heterogeneous Unix computers; operating systems; Parallel computing; Parallel computing methods; parallel programming; Parallel Virtual Machine; PVM; single; Single large parallel computer; Unix; virtual; wavelet transforms; Wavelet transforms computing", pubcountry = "Germany", sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math. Modelling; Tech. Univ. Denmark", thesaurus = "Image processing; Microcomputer applications; Network operating systems; Parallel programming; Unix; Virtual machines; Wavelet transforms", treatment = "P Practical", xxnote = "NB: special form AT{\&T} required to get correct alpha-style labels.", } @InProceedings{Zielinski:1994:PPS, author = "K. Zielinski and M. Gajecki and G. Czajkowski", title = "Parallel programming systems for {LAN} distributed computing", crossref = "IEEE:1994:IPN", pages = "600--607", year = "1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci., Univ. of Min. and Metall., Cracow, Poland", classification = "B6210L (Computer communications); C5620L (Local area networks); C6110P (Parallel programming); C6140D (High level languages)", keywords = "ANSA; Communication tests; Distributed computing environments; Distributed programming systems construction; Experimental results; LAN distributed computing; Linda; P4; Parallel programming systems; Processor farm model efficiency; PVM; Run time efficiency; SR; Strand", thesaurus = "Local area networks; Parallel languages; Parallel programming; Software packages", } @InProceedings{Zu:1994:OSM, author = "Hong Zu and Ya-Dong Gui and L. M. Ni", title = "Optimal software multicast in wormhole-routed multistage networks", crossref = "IEEE:1994:PSW", pages = "703--712", year = "1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inf. Sci. Inst., Univ. of Southern California, Marina del Rey, CA, USA", classification = "C4230M (Multiprocessor interconnection); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming)", keywords = "64-Node SP-1; Application level broadcast; Collective communication; IBM SP-1; Interconnection architecture; Meiko CS-2; Multistage interconnection networks; Optimal multicast algorithm; Optimal software multicast; Public domain MPI; Scalable parallel computers; Switching technology; System level multicast service; TMC CM-5; Wormhole routed multistage networks; Wormhole-routed multistage networks", thesaurus = "Multistage interconnection networks; Parallel algorithms; Parallel machines", } @InProceedings{Almeida:1995:CST, author = "F. Almeida and F. Garcia and J. Roda and D. Morales and Rodriguez and C.", title = "A comparative study of two distributed systems: {PVM} and transputers", crossref = "Cook:1995:TAS", pages = "244--258", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1160 (Combinatorial mathematics); C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Univ. de La Laguna, Tenerife, Spain", keywords = "algorithms; branch and bound; distributed system; divide and conquer methods; divide and conquer parallel heapsort algorithm; dynamic programming; environment; Inmos language; integer knapsack problem; LAN; load balancing; operations research; parallel; parallel algorithm; parallel development environment; parallel virtual machine; programming environments; PVM; quicksort algorithm; software; sorting; sorting problem; system; systems; transputer; transputer links; travelling salesman problem; travelling salesman problems", pubcountry = "Netherlands", sponsororg = "Transputer Consortium; World occam and Transputer User Group; et al", treatment = "P Practical", } @InProceedings{Aloisio:1995:UPW, author = "G. Aloisio and M. A. Bochicchio", title = "The use of {PVM} with workstation clusters for distributed {SAR} data processing", crossref = "Hertzberger:1995:HPM", pages = "570--581", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Fac. di Ingegneria, Lecce Univ., Italy", classification = "B5230 (Electromagnetic compatibility and interference); B6140C (Optical information, image and video signal processing); B6320 (Radar equipment, systems and applications); C1250 (Pattern recognition); C5260B (Computer vision and image processing techniques)", corpsource = "Fac. di Ingegneria, Lecce Univ., Italy", keywords = "active sensor; Active sensor; backscattered echo signals; Backscattered echo signals; cluster of workstations; Cluster of workstations; digital processing; Digital processing; distributed SAR data processing; Distributed SAR data processing; echo; high resolution ground; High resolution ground images; IBM RISC; IBM RISC System 6000/350; image focusing algorithm; Image focusing algorithm; image processing; images; PVM; radar; remote sensing; Remote sensing; synthetic aperture; System 6000/350; workstation clusters; Workstation clusters", pubcountry = "Germany", thesaurus = "Echo; Image processing; Remote sensing; Synthetic aperture radar", treatment = "A Application; P Practical", } @InProceedings{Alves:1995:WPC, author = "A. Alves and L. Silva and J. Carreira and J. G. Silva", title = "{WPVM}: parallel computing for the people", crossref = "Hertzberger:1995:HPM", pages = "582--587", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. de Engenharia Inf., Coimbra Univ., Portugal", classification = "C5440 (Multiprocessing systems); C6150J (Operating systems); C6180 (User interfaces); C7430 (Computer engineering)", keywords = "Microsoft Windows Operating System; MS Windows; Parallel machine; PC LANs; PVM implementation; Windows Parallel Virtual Machine", thesaurus = "Operating systems [computers]; Parallel machines; User interfaces; Virtual machines", } @InProceedings{Ancona:1995:PAD, author = "M. Ancona and M. {De Benedetto}", title = "A parallel algorithm for `document segmentation'", crossref = "IEEE:1995:PEW", pages = "516--521", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartamento di Inf. e Scienza dell'Inf., Genoa Univ., Italy", classification = "C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C6130D (Document processing techniques)", keywords = "Data parallel approach; Divide and conquer implementation; Document segmentation; Parallel algorithm; PVM3 system; Xy-tree; Xy-trees", thesaurus = "Divide and conquer methods; Document image processing; Image segmentation; Parallel algorithms; Tree data structures", } @Article{Anonymous:1995:BRPb, author = "Anonymous", title = "Book Review: {{\booktitle{PVM: Parallel virtual machine: a users' guide and tutorial for networked parallel computing}}: By Al Geist, Adam Beguelin, Jack Dongarra, Weicheng Jiang, Robert Manchek and Vaidy Sunderam. MIT Press, Cambridge, MA. (1994). 279 pages. \$19.95}", journal = j-COMPUT-MATH-APPL, volume = "30", number = "9", pages = "122--122", month = nov, year = "1995", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/0898122195901973", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Anonymous:1995:BRU, author = "Anonymous", title = "Book Review: {{\booktitle{Using MPI: Portable parallel programming with the message-passing interface}}: By William Gropp, Ewing Lusk and Anthony Skjellum. MIT Press, Cambridge, MA. (1994). 307 pages. \$24.95}", journal = j-COMPUT-MATH-APPL, volume = "30", number = "9", pages = "122--122", month = nov, year = "1995", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/089812219590199X", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @InProceedings{Anonymous:1995:UPH, author = "Anonymous", title = "Using {PVM} to Host {CLIPS} in Distributed Environments", crossref = "Anonymous:1995:CCS", pages = "203--211", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Appiani:1995:PSI, author = "E. Appiani and M. Bologna and M. Corvi and M. Iardella", title = "{PVM} in a shared-memory industrial multiprocessor", crossref = "Hertzberger:1995:HPM", pages = "588--593", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Res. and Dev. Services, Elsag Bailey, Genova, Italy", classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C7430 (Computer engineering)", keywords = "EMMA2E; ESPRIT project; Message-passing environment; Performance; Portable parallel applications; PVM; Shared-memory environment; Shared-memory industrial multiprocessor", thesaurus = "Parallel processing; Shared memory systems; Virtual machines", } @InProceedings{Appiani:1995:PSM, author = "E. Appiani and M. Bologna and M. Corvi and M. Iardella", title = "{PVM} in a shared-memory industrial multiprocessor", crossref = "Hertzberger:1995:HPM", pages = "588--593", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems)C5220P (Parallel architecture); C7430 (Computer engineering)", corpsource = "Res. and Dev. Services, Elsag Bailey, Genova, Italy", keywords = "EMMA2E; ESPRIT project; message-; parallel applications; parallel processing; passing environment; performance; portable; PVM; shared memory systems; shared-memory environment; shared-memory industrial multiprocessor; virtual machines", pubcountry = "Germany", treatment = "A Application; P Practical", } @InProceedings{Arioli:1995:PSB, author = "M. Arioli and A. Drummond and I. S. Duff and D. Ruiz", title = "A parallel scheduler for block iterative solvers in heterogeneous computing environments", crossref = "Bailey:1995:PSS", pages = "460--465", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Istituto di Analisi Numerica, CNR, Pavia, Italy", classification = "B0290F (Interpolation and function approximation); C4130 (Interpolation and function approximation); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems)", keywords = "Block iterative solvers; Cimmino method; Communication networks; Heterogeneous computing environments; Heterogeneous processors; Parallel scheduler; PVM 3", thesaurus = "Iterative methods; Parallel algorithms; Parallel machines; Scheduling; Telecommunication networks", } @InProceedings{Arnow:1995:DLB, author = "D. M. Arnow", title = "{DP}: a library for building portable, reliable distributed applications", crossref = "USENIX:1995:PUT", pages = "235--247", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Brooklyn Coll., NY, USA", classification = "C6110P (Parallel programming); C6115 (Programming support)", keywords = "Asynchronous delivery; Communication tool; Disjunctive programming; Distributed processing; Distributed programming; DP; Failure tolerance; Integer goal programming code; Interrupt generating message; Library; Message operation; Message operations; Monte Carlo; Portable software building; Process creation; Process management; Reliable distributed application; Software package; Software portability; Software support", thesaurus = "Application generators; Authoring systems; Distributed processing; Software fault tolerance; Software libraries; Software packages; Software portability", } @InProceedings{Asenjo:1995:SLF, author = "R. Asenjo and E. L. Zapata", title = "Sparse {LU} factorization of the {Cray T3D}", crossref = "Hertzberger:1995:HPM", pages = "690--696", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Archit., Malaga Univ., Spain", classification = "C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150N (Distributed systems software)", keywords = "Compressed row storage; Cray T3D; Cyclic distribution; Distributed memory machines; Dynamic data movement; Fill-in; Local storage schemes; Overall efficiency; Parallel algorithm; Processor mesh; PVM message passing interface; Semi-ordered linked list; Sparse codes; Sparse LU factorization; Sparse matrices; SPMD programming model; Two-dimensional linked list", thesaurus = "Cray computers; Distributed memory systems; Message passing; Parallel algorithms; Parallel programming; Sparse matrices", } @InProceedings{Ashby:1995:PPG, author = "S. F. Ashby and R. D. Falgout and S. G. Smith and A. F. B. Tompson", title = "The parallel performance of a groundwater flow code on the {Cray T3D}", crossref = "Bailey:1995:PSS", pages = "131--136", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Center for Comput. Sci. and Eng., Lawrence Livermore Nat. Lab., CA, USA", classification = "A0260 (Numerical approximation and analysis); A0270 (Computational techniques); A4755M (Flow through porous media); A9240K (Groundwater); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7340 (Geophysics computing)", keywords = "Computational kernels; Cray T3D; Distributed memory MIMD machines; Groundwater flow code; Parallel performance; PVM message-passing library; Three-dimensional heterogeneous porous media", thesaurus = "Flow through porous media; Geophysics computing; Groundwater; Message passing; Numerical analysis; Parallel programming", } @InProceedings{Ayguade:1995:DUA, author = "E. Ayguade and J. Garcia and M. Girones and J. Labarta and J. Torres and M. Valero", title = "Detecting and using affinity in an automatic data distribution tool", crossref = "Pingali:1995:LCP", pages = "61--75", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. d'Arquitectura de Computadors, Univ. Politecnica de Catalunya, Barcelona, Spain", classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", keywords = "Affinity; Alignment; Alignment functions; Alignment preferences; Arrays; Automatic data distribution tool; Data Distribution Tool; Fortran77; Loop reference patterns; Perfect Club benchmarks; Programs; Reference pattern analysis; SPEC benchmarks; Static functions; Tool phases", thesaurus = "Parallel programming; Software tools", } @InProceedings{Bakhtiari:1995:APL, author = "S. Bakhtiari and R. Safavi-Naini", title = "Application of {PVM} to linear cryptanalysis", crossref = "Gray:1995:PCT", pages = "278--279", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Wollongong Univ., NSW, Australia", classification = "C4240P (Parallel programming and algorithm theory); C6130S (Data security)", corpsource = "Wollongong Univ., NSW, Australia", keywords = "attack; Attack; block cipher algorithms; Block cipher algorithms; cryptography; Data; Data Encryption Standard; Encryption Standard; linear cryptanalysis; Linear cryptanalysis; parallel algorithms; PVM; virtual machines", pubcountry = "Netherlands", thesaurus = "Cryptography; Parallel algorithms; Virtual machines", treatment = "T Theoretical or Mathematical", } @InProceedings{Barbour:1995:PIG, author = "A. E. Barbour and M. F. Gabre", title = "Parallel Implementation of {Gauss--Seidel} and Conjugate Gradient For Solving System of Linear Equations {$ A x = b $} Using {PVM}", crossref = "Aityan:1995:PFI", pages = "33--36", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Georgia Southern Univ., Statesboro, GA, USA", classification = "B0290F (Interpolation and function approximation); B0290H (Linear algebra); C4130 (Interpolation and function approximation); C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems)", corpsource = "Dept. of Math. and Comput. Sci., Georgia Southern Univ., Statesboro, GA, USA", keywords = "algorithms; conjugate gradient methods; Conjugate gradient methods; definite band matrix; equations; Gauss--Seidel method; iterations; Iterations; linear; Linear equations; matrix algebra; parallel; parallel implementation; Parallel implementation; parallel machines; positive; Positive definite band matrix; PVM; solution vector; Solution vector; systematic behavior; Systematic behavior", thesaurus = "Conjugate gradient methods; Matrix algebra; Parallel algorithms; Parallel machines", treatment = "A Application; P Practical", } @Article{Beaumont:1995:DPG, author = "P. M. Beaumont and P. T. Bradshaw", title = "A distributed parallel genetic algorithm for solving optimal growth models", journal = j-COMP-ECONOMICS, volume = "8", number = "3", pages = "159--179", month = aug, year = "1995", CODEN = "CNOMEL", ISSN = "0927-7099", ISSN-L = "0927-7099", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Econ., Florida State Univ., Tallahassee, FL, USA", classification = "C1180 (Optimisation techniques); C1290D (Systems theory applications in economics and business); C4130 (Interpolation and function approximation); C4150 (Nonlinear and functional equations); C4180 (Integral equations); C6150N (Distributed systems software); C7120 (Financial computing); C7310 (Mathematics computing)", fjournal = "Computational Economics", keywords = "Agent discounted utility maximization; Chebyshev polynomial series expansion; Competing nodes; Distributed parallel genetic algorithm; Economic growth; Exact Euler equation; Finite horizon; First-order conditions; Function topology; Generalization; Infinite horizon; Multiple state problems; Nonlinear integral equation; Optimal function fitting; Parameter space searching; PVM; Single-state deterministic optimal growth model; State variable range; State-space searching; Taylor-Uhlig problem; Workstation cluster", pubcountry = "Netherlands", thesaurus = "Chebyshev approximation; Distributed algorithms; Economic cybernetics; Financial data processing; Genetic algorithms; Integral equations; Mathematics computing; Nonlinear equations; Polynomials; State-space methods", } @Article{Beguelin:1995:REP, author = "Adam Beguelin and Jack Dongarra and Al Geist and Robert Manchek and Vaidy Sunderam", title = "Recent Enhancements to {PVM}", journal = j-IJSAHPC, volume = "9", number = "2", pages = "108--127", month = "Summer", year = "1995", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "This paper presents new features of PVM, a popular standard for writing parallel programs that execute over networks of heterogeneous machines. Although PVM has become an important infrastructure for parallel programmers, we continue to develop the system based both on user feedback and our own research interests. In this paper we present new communications routines and briefly characterize their performance. We describe new extensible services that allow advanced users to customize certain aspects of the default PVM functionality. An overview of shared-memory PVM optimizations is presented. PVM's new tracing facility and a graphical console that utilizes this capability are described. Finally, we discuss future extensions to PVM now under investigation.", acknowledgement = ack-nhfb, affiliation = "Carnegie Mellon Univ", affiliationaddress = "Pittsburgh, PA, USA", classification = "722.1; 722.2; 722.4; 723.1; 723.1.1; 921.5; C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", journalabr = "Int J Supercomput Appl High Perform Comput", keywords = "Advanced users; advanced users; communications; Communications routines; Computer architecture; Computer networks; Computer programming languages; Computer software; console; Data communication systems; Data storage equipment; evaluation; Extensible services; extensible services; graphical; Graphical console; Heterogeneous machines; heterogeneous machines; Message passing; Optimization; parallel machines; Parallel processing systems; Parallel programmers; parallel programmers; parallel programming; Parallel programs; parallel programs; Parallel virtual machine; Parallel Virtual Machine; Performance; performance; PVM; routines; Shared memory; shared memory; Shared-memory PVM optimizations; shared-memory PVM optimizations; software libraries; software performance; software standards; Standard; standard; systems; Tracing facility; tracing facility; User feedback; user feedback; virtual machines", thesaurus = "Parallel machines; Parallel programming; Shared memory systems; Software libraries; Software performance evaluation; Software standards; Virtual machines", treatment = "A Application; P Practical", } @InProceedings{Bendrider:1995:SME, author = "M. Bendrider and J.-M. Leclercq", title = "Second-Order {M{\o}ller--Plesset} and {Epstein-Nesbet} Corrections to the Molecular Charge Density: Distributed Computing on a Cluster of Heterogeneous Workstations with the {PVM} System", crossref = "Bernardi:1995:CCE", pages = "73--??", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Benkner:1995:VFA, author = "S. Benkner", title = "{Vienna Fortran 90} --- an advanced data parallel language", crossref = "Malyshkin:1995:PCT", pages = "142--156", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. for Software Technol., Wien Univ., Austria", classification = "C6110P (Parallel programming); C6120 (File organisation); C6140D (High level languages)", keywords = "Advanced data parallel language; Data distribution; Distributed memory parallel computers; Explicit user control; Pointer objects; Shared memory programming paradigm; User defined data structures; Vienna Fortran 90", thesaurus = "FORTRAN; Message passing; Parallel languages; Shared memory systems; Storage management", } @Article{Berendsen:1995:GMP, author = "H. J. C. Berendsen and D. van der Spoel and R. van Drunen", title = "{GROMACS}: a message-passing parallel molecular dynamics implementation", journal = j-COMP-PHYS-COMM, volume = "91", number = "1-3", pages = "43--56", month = sep, year = "1995", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Bioson Res. Inst., Groningen Univ., Netherlands", classification = "A3115 (General mathematical and computational developments for atoms and molecules); A3420 (Interatomic and intermolecular potentials and forces); A3425 (Intramolecular energy transfer; A3520D (Interatomic distances and angles in molecules); A6120J (Computer simulation of static and dynamic liquid behaviour); A8710 (General, theoretical, and mathematical biophysics); A8715 (Molecular biophysics); C6110P (Parallel programming); C7320 (Physics and chemistry computing); C7330 (Biology and medical computing); dynamics of van der Waals molecules); intramolecular dynamics", fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", keywords = "Analysis tools; Aqueous environment; Biomacromolecules; Buckingham potentials; Charge groups; Conversion programs; Cosine power series interactions; Coulomb potentials; Custom-designed 32-processor ring GROMACS; Dihedral angles; Energy minimization program; Fixed bonded interactions; GROMACS software; GROningen MAchine for Chemical Simulation; Interprocessor communication; Lennard-Jones potentials; Message-passing parallel molecular dynamics implementation; Molecular dynamics program; Parallel message-passing implementation; Parallel system; Particle decomposition; Pressure scaling; Rectangular periodic boundary conditions; Temperature scaling; Variable nonbonded pair interactions", pubcountry = "Netherlands", thesaurus = "Biology computing; Bond angles; Chemistry computing; Digital simulation; Electric potential; Lennard-Jones potential; Molecular biophysics; Molecular dynamics method; Parallel programming", } @Article{Bernaschi:1995:DRP, author = "Massimo Bernaschi and Giorgio Richelli", title = "Development and results of {PVMe} on the {IBM 9076 SP1}", journal = j-J-PAR-DIST-COMP, volume = "29", number = "1", pages = "75--83", day = "15", month = aug, year = "1995", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1995.1107", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:18:58 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1107/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1107/production/pdf", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6115 (Programming support)", corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome, Italy", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "IBM 9076 SP1; IBM's AIX implementation; message passing; parallel machines; PARMACS; passing programming model; programming environments; PVM message; PVMe", treatment = "A Application; P Practical", } @InProceedings{Bernaschi:1995:PEI, author = "M. Bernaschi and G. Richelli", title = "{PVMe}: an enhanced implementation of {PVM} for the {IBM 9076 SP2}", crossref = "Hertzberger:1995:HPM", pages = "461--471", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM Eur. Center for Sci. and Eng. Comput., Rome, Italy", classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome, Italy", keywords = "IBM 9076 SP2; IBM's AIX implementation; message; message passing; Message passing programming model; parallel programming; passing programming model; PVMe", pubcountry = "Germany", thesaurus = "Message passing; Parallel programming", treatment = "P Practical", } @InProceedings{Bickham:1995:POM, author = "J. L. Bickham", title = "Parallel ocean modeling using {Glenda}", crossref = "ACM:1995:PAS", pages = "58--63", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Univ. of Southern Mississippi, Hattiesburg, MS, USA", classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C7340 (Geophysics computing)", keywords = "Array dependency; Debugging methods; Glenda; Ocean dynamics; Parallel ocean modeling; Parallel version; Parallelization process; PVM; SWEM", thesaurus = "Geophysics computing; Oceanographic techniques; Parallel programming; Program debugging", } @InProceedings{Bischof:1995:CSM, author = "C. Bischof and S. Huss-Lederman and Xiaobai Sun and A. Tsao and T. Turnbull", title = "A Case Study of {MPI}: Portable and Efficient Libraries", crossref = "Bailey:1995:PSS", pages = "728--733", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Proceedings of the Seventh SIAM Conference on Parallel Processing for Scientific Computing", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "1 dimensional broadcast; 1 Dimensional broadcast; ANL/MS MPI implementation; Argonne National Laboratory/Mississippi State; broadcasting; case study; Case study; CM5; Delta; efficient libraries; Efficient libraries; Intel Delta; message passing; Message Passing Interface standard; MPI; MPI based implementations; MPI broadcast collective operation; native NX message passing systems; Native NX message passing systems; optimized versions; Optimized versions; Paragon; parallel programming; parallel programming system; Parallel programming system; portable public domain version; Portable public domain version; software libraries; software portability; software standards; SP1", thesaurus = "Broadcasting; Message passing; Parallel programming; Software libraries; Software portability; Software standards", treatment = "P Practical", } @InProceedings{Bjorge:1995:ISS, author = "D. Bjorge", title = "Implementation of the semi-implicit scheme in a message passing version of {HIRLAM} (weather forecasting)", crossref = "Hoffmann:1995:CAP", pages = "75--90", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Norwegian Meteorol. Inst., Oslo, Norway", classification = "A9260X (Weather analysis and prediction); C4185 (Finite element analysis); C6110P (Parallel programming); C7340 (Geophysics computing)", keywords = "Algorithms; Atmosphere; Cray T3D SHMEM; DNMI; HIRLAM; Intel NX; Message passing; Meteorology; MPP; Numerical model; Parallel iterative Helmholtz solver; Parallel programming; PVM; Semi-implicit scheme; Semiimplicit scheme; Time integration scheme; Weather forecasting", thesaurus = "Digital simulation; Finite element analysis; Iterative methods; Message passing; Numerical analysis; Parallel processing; Parallel programming; Weather forecasting", } @InProceedings{Blaszczyk:1995:PCE, author = "A. Blaszczyk and Z. Andjelic and P. Levin and A. Ustundag", title = "Parallel computation of electric fields in a heterogeneous workstation cluster", crossref = "Hertzberger:1995:HPM", pages = "606--611", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Corp. Res., Asea Brown Boveri AG, Heidelberg, Germany", classification = "C5440 (Multiprocessing systems); C7310 (Mathematics computing); C7320 (Physics and chemistry computing); C7430 (Computer engineering)", keywords = "3D boundary element code; Benchmark problems; Design process; Dynamic load balancing; Electric fields; Heterogeneous workstation cluster; Parallel computation; PVM communication software", thesaurus = "Electric fields; Mathematics computing; Parallel processing; Physics computing; Virtual machines", } @InProceedings{Boianov:1995:DLC, author = "L. Boianov and I. Jelly", title = "Distributed logic circuit simulation on a network of workstations", crossref = "IEEE:1995:PEW", pages = "304--310", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Lab. for Distributed Syst. and Comput. Networks, Acad. of Sci., Sofia, Bulgaria", classification = "B1130B (Computer-aided circuit analysis and design); C5210B (Computer-aided logic design); C6150N (Distributed systems software); C7410D (Electronic engineering computing)", keywords = "Digital circuits; Distributed digital logic simulation; Logic circuit simulation; Logical simulation algorithms; Parallel Virtual Machine", thesaurus = "Digital simulation; Distributed processing; Logic CAD", } @InProceedings{Boryczko:1995:NIC, author = "I. Boryczko and J. Kitowski and J. Moscinski and A. Leszczynski", title = "Numerically intensive computing as a benchmark for parallel computer architectures", crossref = "Hertzberger:1995:HPM", pages = "118--123", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. of Comput. Sci., Cracow, Poland", classification = "C4100 (Numerical analysis); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C5620L (Local area networks); C5670 (Network performance); C7320 (Physics and chemistry computing)", keywords = "Computer network; Execution time; Multiprocessors; Numerically intensive computing; Parallel architectures; Parallel computer architecture benchmark; PVM environment; Vector supercomputers", thesaurus = "Local area networks; Molecular dynamics method; Multiprocessing systems; Parallel architectures; Performance evaluation; Physics computing; Vector processor systems", } @InProceedings{Branca:1995:CBH, author = "A. Branca and M. Ianigro and A. Distante", title = "A comparison between {HPF} and {PVM} for data parallel algorithms on a cluster of workstations using a high speed network", crossref = "Hertzberger:1995:HPM", pages = "930--931", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Briscolini:1995:PID, author = "M. Briscolini", title = "A parallel implementation of a {3-D} pseudospectral based code on the {IBM 9076} scalable {POWER} parallel system", journal = j-PARALLEL-COMPUTING, volume = "21", number = "11", pages = "1849--1862", day = "29", month = nov, year = "1995", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Apr 14 12:05:41 MDT 1997", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=11; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=11&aid=1027", acknowledgement = ack-nhfb, classification = "A4725 (Turbulent flows, convection, and heat transfer); B0290Z (Other numerical methods); C4190 (Other numerical methods); C5440 (Multiprocessing systems); C7310 (Mathematics computing); C7320 (Physics and chemistry computing)", corpsource = "IBM ECSEC, Eur. Center for Sci. and Eng. Comput., Roma, Italy", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "3-D; 3-D pseudospectral based code; 9076 scalable POWERparallel system; architecture; computational kernels; computing; D FFTs; fast Fourier transforms; high intensive numerical simulations; homogeneous turbulent flows; IBM; implementations; mathematics computing; message; message passing; MPL; numerical analysis; parallel 3-; parallel distributed memory; parallel implementation; parallel interfaces; parallel processing; passing; physics; PVMe; turbulence", treatment = "A Application; P Practical", } @TechReport{Bruck:1995:EMPa, author = "Jehoshua Bruck", title = "Efficient message passing interface ({MPI}) for parallel computing on clusters of workstations", type = "Research report", number = "RJ 9925 (87305)", institution = inst-IBM-WATSON, address = inst-IBM-WATSON:adr, pages = "31", year = "1995", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel computing on clusters of workstations and personal computers has very high potential, since it leverages existing hardware and software. Parallel programming environments offer the user a convenient way to express parallel computation and communication. In fact, recently, a Message Passing Interface (MPI) has been proposed as an industrial standard for writing `portable' message-passing parallel programs. The communication part of MPI consists of the usual point-to-point communication as well as collective communication. However, existing implementations of programming environments for clusters are built on top of a point-to-point communication layer (send and receive) over local area networks (LANs) and, as a result, suffer from poor performance in the collective communication part. In this paper, we present an efficient design and implementation of the collective communication part in MPI that is optimized for clusters of workstations. Our system consists of two main components: the MPI-CCL layer that includes the collective communication functionality of MPI and a User-level Reliable Transport Protocol (URTP) that interfaces with the LAN Data-link layer and leverages the fact that the LAN is a broadcast medium. Our system is integrated with the operating system via an efficient kernel extension mechanism that we developed. The kernel extension significantly improves the performance of our implementation as it can handle part of the communication overhead without involving user space. We have implemented our system on a collection of IBM RS/6000 workstations connected via a 10Mbit Ethernet LAN. Our performance measurements are taken from real scientific applications that runin a parallel mode by means of the MPI. The hypothesis behind our design is that system's performance will be bounded by interactions between the kernel and user space rather than by the bandwidth delivered by the LAN Data-Link Layer. Our results indicate that the performance of our MPI Broadcast (on top of Ethernet) is about twice as fast as a recently published software implementation of broadcast on top of ATM.", acknowledgement = ack-nhfb, annote = "December 13, 1995.", institutes = "IBM Research Division", keywords = "Computer interfaces", } @InProceedings{Bruck:1995:EMPb, author = "Jehoshua Bruck and Danny Dolev and Ching-Tien Ho and Marcel-Catalin Rosu and Ray Strong", title = "Efficient {Message Passing Interface} ({MPI}) for Parallel Computing on Clusters of Workstations", crossref = "ACM:1995:SAA", pages = "64--73", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "An efficient design and implementation of the collective communication part in a Message Passing Interface (MPI) that is optimized for clusters of workstations is described. The system which consist of two main components, the MPI-CCL layer and a User-level Reliable Transport Protocol (URTP), is integrated with the operating system via an efficient kernel extension mechanism. The system is then implemented on a collection of IBM RS\slash 6000 workstations connected via a 10Mbit Ethernet LAN. Results indicate that the performance of the MPI Broadcast (on top of Ethernet) is about twice as fast as a recently published software implementation of broadcast on top of ATM.", acknowledgement = ack-nhfb, affiliation = "California Inst of Technology", affiliationaddress = "Pasadena, CA, USA", classification = "716.1; 722.2; 722.3; 722.4; 723.1; C5470 (Performance evaluation and testing); C5610N (Network interfaces); C5620L (Local area networks); C5640 (Protocols); C5670 (Network performance); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conference = "Proceedings of the 7th Annual ACM Symposium on Parallel Algorithms and Architectures, SPAA'95", conftitle = "Proceedings of Seventh Annual ACM Symposium on Parallel Algorithms and Architectures", corpsource = "California Inst. of Technol., Pasadena, CA, USA", journalabr = "Annu ACM Symp Parallel Algorithms Archit", keywords = "10 Mbit/s; application program interfaces; broadcast medium; Broadcast medium; collective communication; Collective communication; Communication overhead; communication overhead; Communication overhead; Computer operating systems; Computer software portability; Computer systems programming; Computer workstations; Data communication systems; Ethernet; Ethernet LAN; IBM RS/6000 workstations; industrial standard; Industrial standard; Interfaces (computer); Kernel extension mechanism; kernel extension mechanism; Kernel extension mechanism; LAN Data link-layer; LAN data-link layer; Local area networks; local area networks; message passing; Message passing interface; message passing interface; Message passing interface; MPI CCL layer; MPI-CCL layer; network interfaces; Network protocols; operating system; Operating system; parallel computing; Parallel computing; Parallel processing systems; Parallel programming; parallel programming; Performance; performance; Performance; performance evaluation; Personal computers; Point-to-point communication; point-to-point communication; Point-to-point communication; portable message-passing parallel programs; Portable message-passing parallel programs; Program processors; programming environments; Programming environments; scientific programs; Scientific programs; software libraries; software portability; Systems analysis; transport protocols; URTP; user space; User space; User-level reliable transport protocol; user-level reliable transport protocol; User-level reliable transport protocol; workstation clusters; Workstation clusters; workstations", meetingaddress = "Santa Barbara, CA, USA", meetingdate = "Jul 17--19 1995", meetingdate2 = "07/17--19/95", numericalindex = "Bit rate 1.0E+07 bit/s", sponsor = "ACM SIGACT; ACM SIGARCH; EATCS", sponsororg = "ACM; EATCS", thesaurus = "Application program interfaces; Local area networks; Message passing; Network interfaces; Parallel programming; Performance evaluation; Programming environments; Software libraries; Software portability; Transport protocols; Workstations", treatment = "P Practical", } @InProceedings{Bubeck:1995:DSC, author = "T. Bubeck and M. Hiller and W. Kuchlin and W. Rosenstiel", title = "Distributed symbolic computation with {DTS}", crossref = "Ferreira:1995:PAI", pages = "231--248", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Wilhelm-Schickard-Inst. fur Inf., Tubingen Univ., Germany", classification = "C4130 (Interpolation and function approximation); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6115 (Programming support); C6130S (Data security); C6150N (Distributed systems software)", keywords = "Anonymous compute servers; Asynchronous RPC abstraction; C threads interface; Cryptosystem; Distributed symbolic computation; Distributed threads system; DTS; Fork/join parallel programming; Highly data-dependent algorithm parallelisation; Irregular algorithm parallelisation; Multiprocessor workstation; Multithreading; Parallel long integer multiplication; Parallel multi-variate polynomial resultant computation; Performance results; Programming environment; PVM; Shared memory threads", thesaurus = "Arithmetic; Cryptography; Distributed memory systems; Multiprocessing programs; Multiprocessing systems; Parallel algorithms; Parallel programming; Polynomials; Programming environments; Remote procedure calls; Shared memory systems; Software performance evaluation; Symbol manipulation; Workstations", } @Article{Bunge:1995:MCM, author = "Hans-Peter Bunge and John R. Baumgardner", title = "Mantle convection modeling on parallel virtual machines", journal = j-COMPUT-PHYS, volume = "9", number = "2", pages = "207--??", month = mar, year = "1995", CODEN = "CPHYE2", DOI = "https://doi.org/10.1063/1.168525", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Wed Apr 10 08:45:53 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/computphys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://aip.scitation.org/doi/10.1063/1.168525", acknowledgement = ack-nhfb, ajournal = "Comput. Phys", fjournal = "Computers in Physics", journal-URL = "https://aip.scitation.org/journal/cip", } @InProceedings{Carreira:1995:DEL, author = "J. Carreira and L. Silva and J. G. Silva", title = "On the design of {Eilean}: a {Linda-like} library for {MPI}", crossref = "IEEE:1995:PSP", pages = "175--184", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Coimbra Univ., Portugal", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Coimbra Univ., Portugal", keywords = "access policies; Access policies; communication system; Communication system; distribution policies; Distribution policies; Eilean; hierarchical distribution; Hierarchical distribution; hierarchical partitioning scheme; Hierarchical partitioning scheme; Linda; Linda-like library; message passing; message passing standard; Message passing standard; MPI; parallel languages; parallel library; Parallel library; parallel programming; programming paradigm; Programming paradigm; run-time system; Run-time system; software libraries; software library; Software library; software portability; Software portability; tuple mapping task; Tuple mapping task; tuple space; Tuple space", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Message passing; Parallel languages; Parallel programming; Software libraries; Software portability", treatment = "P Practical", } @TechReport{Casanova:1995:PPM, author = "Henri Casanova and Jack Dongarra and Weicheng Jiang", title = "The Performance of {PVM} on {MPP} Systems", type = "Technical report", institution = inst-UTK, address = inst-UTK:adr, month = aug, year = "1995", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/papers/pvmmpp.ps; http://www.netlib.org/utk/papers/pvmmpp/pvmmpp.html; http://www.netlib.org/utk/people/JackDongarra/pdf/pvmmpp.pdf", acknowledgement = ack-nhfb, } @Article{Casas:1995:MMT, author = "Jeremy Casas and Dan L. Clark and Ravi Konuru and Steve W. Otto and Robert M. Prouty and Jonathan Walpole", title = "{MPVM}: a Migration Transparent Version of {PVM}", journal = j-COMP-SYS, volume = "8", number = "2", pages = "171--216", month = "Spring", year = "1995", CODEN = "CMSYE2", ISSN = "0895-6340", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Oregon Graduate Inst. of Sci. and Technol., Beaverton, OR, USA", classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Oregon Graduate Inst. of Sci. and Technol., Beaverton, OR, USA", fjournal = "Computing Systems", keywords = "Dynamic process migration; dynamic process migration; general-; General-purpose workstation environments; Idle-cycles; idle-cycles; message passing; message-; Message-passing parallel machine; Migratable PVM; Migration-transparent version; migration-transparent version; MPVM; Off-loading; off-loading; Parallel computations; parallel computations; parallel machines; parallel programming; Parallel Virtual Machine; passing parallel machine; Performance; performance; purpose workstation environments; software performance evaluation; Unix; UNIX-based computers; virtual machines; workstations", thesaurus = "Message passing; Parallel machines; Parallel programming; Software performance evaluation; Unix; Virtual machines; Workstations", treatment = "P Practical", } @InProceedings{Cavender:1995:APN, author = "M. E. Cavender and Xiaodong Zhang", title = "Asynchronous {PVM} Network Computing", crossref = "Bailey:1995:PSS", pages = "772--773", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "High Performance Comput. and Software Lab., Texas Univ., San Antonio, TX, USA", classification = "C5620L (Local area networks); C5640 (Protocols); C6150N (Distributed systems software)", corpsource = "High Performance Comput. and Software Lab., Texas Univ., San Antonio, TX, USA", keywords = "asynchronous PVM network computing; Asynchronous PVM network computing; blocking; Blocking; incoming message buffer; Incoming message buffer; interrupt; Interrupt; local area networks; message passing; Message passing; operation; performance penalty; Performance penalty; processors; Processors; program; Program; protocols; PVM daemon; receiver; Receiver; stop and; Stop and wait protocol; synchronized; Synchronized operation; user program; User program; wait protocol", thesaurus = "Local area networks; Message passing; Protocols", treatment = "T Theoretical or Mathematical", } @InProceedings{Cavender:1995:SSA, author = "Mark E. Cavender and Xiaodong Zhang", title = "Software support for asynchronous computing across networks", crossref = "IEEE:1995:PNA", pages = "376--382", year = "1995", CODEN = "PSICD2", ISSN = "0730-6512", bibdate = "Fri May 24 09:58:00 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95CB35838.", abstract = "This paper describes the design and implementation of asynchronous communication library routines for distributed computing across networks of workstations. The new system is based on modifications of the existing PVM message-passing environment. An intensive and comparative study of synchronous, asynchronous and non-blocking communication protocols is addressed in terms of their design, implementation and applications. Experimental performance comparisons of an application program using the three communication protocols on a network of workstations, are also presented. The experimental results show the power of the asynchronous communication library and the effective enhancements of the PVM message-passing environment.", acknowledgement = ack-nhfb, affiliation = "Univ of Texas at San Antonio", affiliationaddress = "San Antonio, TX, USA", classification = "722.2; 722.4; 723.1; 723.5; C5620L (Local area networks); C5640 (Protocols); C6110B (Software engineering techniques); C6115 (Programming support); C6150N (Distributed systems software)", conference = "Proceedings of the 19th Annual International Computer Software and Applications Conference COMPSAC '95", journalabr = "Proc IEEE Comput Soc Int Comput Software Appl Conf", keywords = "Application program; Asynchronous communication library routine design; Asynchronous communication library routines; Asynchronous communication protocols; Asynchronous computing; Computer aided software engineering; Computer networks; Computer workstations; Data communication systems; Distributed computer systems; Distributed computing; Modified PVM message-passing environment; Network protocols; Nonblocking communication protocols; Parallel virtual machine (PVM) message passing environment; Performance comparisons; Software support; Synchronous communication protocols; Workstation network", meetingaddress = "Dallas, TX, USA", meetingdate = "Aug 9--11 1995", meetingdate2 = "08/09--11/95", sponsor = "IEEE", thesaurus = "Local area networks; Message passing; Network operating systems; Operating systems [computers]; Protocols; Software libraries; Software performance evaluation; Workstations", } @InProceedings{Chamaret:1995:PFE, author = "B. Chamaret and H. Cherefi and S. Ubeda", title = "Parallel filter estimation maximisation algorithm for segmentation on a {LAN} of workstation", crossref = "Bailey:1995:PSS", pages = "68--69", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "TSI Lab., Univ. Jean-Monnet, Saint-Etienne, France", classification = "B6140C (Optical information, image and video signal processing); B6210L (Computer communications); C1250 (Pattern recognition); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C5620L (Local area networks)", keywords = "Bayesian segmentation algorithm; Grey level images; Image segmentation; LAN of workstation; Parallel filter estimation maximisation algorithm; Parallel Virtual Machine package; Portable parallel application", thesaurus = "Bayes methods; Image segmentation; Local area networks; Parallel algorithms", } @InProceedings{Chang:1995:EPCa, author = "S.-L. Chang and D. H. C. Du and J. Hsieh and M. Lin", title = "Enhanced {PVM} Communications Over a High-Speed Local Area Network", crossref = "Alnuweiri:1995:PHF", pages = "37--46", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Chang:1995:EPCb, author = "Sheue-Ling Chang and David Hung-Chang Du and Jenwei Hsieh and Rose P. Tsang and Mengjou Lin", title = "Enhanced {PVM} Communications over a {High-Speed LAN}", journal = j-IEEE-PAR-DIST-TECH, volume = "3", number = "3", pages = "20--32", month = "Fall", year = "1995", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/M-PDT.1995.414841", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Performance results of PVM over a local ATM network show the availability of much greater communication bandwidth over traditional LANs such as Ethernet. Realizing the full potential of high-speed networks, therefore, will require further improvements in both hardware and software components of network I/O subsystems.", abstract2 = "Enhanced Parallel Virtual Machine (PVM) communications over a high speed local area network is described. Performance results of PVM over a local asynchronous transfer mode (ATM) show the availability of much greater communication bandwidth over traditional LANs. Application-level performance, however, still lags far behind the capabilities of the physical medium. Realizing the full potential of high-speed networks, therefore, will require further improvements in both hardware and software components of network input\slash output subsystems.", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Univ. of Minnesota", affiliationaddress = "Minneapolis, MN, USA", classification = "716; 722.2; 722.3; 722.4; 723; 731; B6210L (Computer communications); B6230 (Switching centres and equipment); C5620L (Local area networks); C5670 (Network performance)", corpsource = "Dept. of Comput. Sci., Minnesota Univ., Minneapolis, MN, USA", fjournal = "IEEE parallel and distributed technology: systems and applications", journalabr = "IEEE Parallel Distrib Technol", keywords = "application-level performance; Application-level performance; application-level performance; Asynchronous transfer mode; asynchronous transfer mode; Communication bandwidth; communication bandwidth; Computer architecture; Data communication systems; evaluation; Fiber distributed data interface; high-; high-speed LAN; High-speed LAN; high-speed LAN; High-speed networks; Interfaces (computer); Local area networks; local area networks; Local area networks; local area networks; local ATM network; Local ATM network; local ATM network; Multicasting measurements; Network I/O subsystems; network I/O subsystems; Parallel processing systems; Parallel virtual machine (PVM); Parallel virtual machine (pvm); Performance; performance; Performance; performance; PVM communications; speed networks; Systems analysis; systems analysis", thesaurus = "Asynchronous transfer mode; Local area networks; Performance evaluation", treatment = "A Application; P Practical", } @InProceedings{Chapple:1995:PUL, author = "S. R. Chapple and L. J. Clarke", title = "The {Parallel Utilities Library}", crossref = "IEEE:1995:PSP", pages = "21--30", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Parallel Comput. Center, Edinburgh Univ., UK", classification = "C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C7480 (Production engineering computing)", keywords = "AEA Technology; Domain decomposition; Industrial applications; Library modules; Message passing; Message-Passing Interface; MPI; Parallel scalable I/O; Parallel systems; Parallel Utilities Library; PUL; Rolls-Royce; Shell UK; Task parallelism; Unstructured mesh applications", thesaurus = "Industries; Message passing; Parallel programming; Software libraries; Software portability; Software reusability; Subroutines", } @InProceedings{Clematis:1995:PPH, author = "A. Clematis and B. Falcidieno and D. F. Prieto and M. Spagnuolo", title = "Parallel processing on heterogeneous networks for {GIS} applications", crossref = "Hertzberger:1995:HPM", pages = "67--72", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IMA-CNR, Genoa, Italy", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C7840 (Geography and cartography computing)", keywords = "Applications parallelization; Geographic information systems; Heterogeneous networks; Linda; Network-based parallel computing; Parallel program development; Performance; Portable communication libraries; PVM; Software portability; Software reusability", thesaurus = "Geographic information systems; Parallel processing; Software libraries; Software portability; Software reusability", } @InProceedings{Clemencon:1995:AEP, author = "C. Clemencon and A. Endo and J. Fritscher and A. Muller and R. Ruhl and B. J. N. Wylie", title = "The 'Annai' environment for portable distributed parallel programming", crossref = "El-Rewini:1995:PTE", pages = "242--251 (vol. 2)", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Swiss Federal Inst. of Technol., Zurich, Switzerland", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software); C6180 (User interfaces)", keywords = "Annai environment; Application developers; Common user interface; Distributed memory parallel processors; Dynamic data distributions; Feedback; Functionality enhancements; High Performance Fortran compiler; High-level data-parallel programming; Interactive performance monitor; Language extensions; Low-level machine interface; Low-level message-passing programming; Message Passing Interface; Performance analyzer; Performance results; Portability; Portable distributed parallel programming environment; Source-level debugger; Target hardware architecture; Tool prototypes; Unstructured problem parallelization", thesaurus = "Distributed memory systems; FORTRAN; Message passing; Parallel programming; Program compilers; Program debugging; Program diagnostics; Programming environments; Software performance evaluation; Software portability; Software tools; User interfaces", } @InProceedings{Clemencon:1995:IRD, author = "C. Clemencon and J. Fritscher and M. J. Meehan and R. R{\"u}hl", title = "An Implementation of Race Detection and Deterministic Replay with {MPI}", crossref = "Haridi:1995:EPP", pages = "155--166", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech. Hochschule, Manno, Switzerland", classification = "C6110P (Parallel programming); C6110S (Software metrics); C6115 (Programming support); C6140D (High level languages); C6150C (Compilers, interpreters and other processors)", conftitle = "EURO-PAR '95. Parallel Processing. First International EURO- PAR Conference. Proceedings", corpsource = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech. Hochschule, Manno, Switzerland", keywords = "Annai programming environment; Computational efficiency; computational efficiency; Data-parallel program; data-parallel program; Deterministic replay; deterministic replay; FORTRAN; High Performance Fortran; HPF; Integrated environment; integrated environment; Joint CSCS-ETH/NEC Collaboration; message passing; Message-passing program; message-passing program; MPI; Parallel Debugging Tool; Parallel language; parallel language; parallel programming; parallelising compilers; Parallelized MPI program; parallelized MPI program; PDT; Program debugging; program debugging; Programming environment; programming environment; programming environments; programming languages; Race detection; race detection; Replaying mechanism; replaying mechanism; software metrics; Software performance; software performance; software performance evaluation; Software tool; software tool; Tracing; tracing", thesaurus = "FORTRAN; Message passing; Parallel programming; Parallelising compilers; Program debugging; Programming environments; Programming languages; Software metrics; Software performance evaluation", treatment = "P Practical", } @InProceedings{Cooperman:1995:SBP, author = "G. Cooperman", title = "{STAR\slash MPI}: binding a parallel library to interactive symbolic algebra systems", crossref = "Levelt:1995:IIS", pages = "126--132", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Coll. of Comput. Sci., Northeastern Univ., Boston, MA, USA", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C7310 (Mathematics computing)", keywords = "GCL; GNU Common LISP; Interactive symbolic algebra systems; Mathematical group theory; Parallel library; STAR/MPI; Symbolic algebra", thesaurus = "Parallel programming; Software libraries; Symbol manipulation", } @InProceedings{Cooperman:1995:SMB, author = "Gene Cooperman", title = "{STAR\slash MPI}: Binding a Parallel Library to Interactive Symbolic Algebra Systems", crossref = "Levelt:1995:IIS", pages = "126--132", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This work is aimed at making parallel programming more accessible to users of symbolic algebra systems and to users of interactive languages in general. This is done by integrating MPI (Message Passing Interface), a portable, parallel message-passing library, with two interactive languages: GCL (GNU Common LISP), and GAP. The GAP system includes a general purpose language for mathematical group theory, and LISP is the basis for several general-purpose symbolic algebra systems. In addition, a simple master-slave abstraction is written, so that end-users need not learn any of the details of the MPI function calls. This work is distinct from past studies in that it provides the ability to interactively create, test and modify a distributed environment using the original interactive language and a portable parallel library.", acknowledgement = ack-nhfb, affiliation = "Northeastern Univ", affiliationaddress = "Boston, MA, USA", classification = "721.1; 722.2; 722.4; 723.1; 723.5; 921.1; C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C7310 (Mathematics computing)", conference = "Proceedings of the 1995 International Symposium on Symbolic and Algebraic Computation", conftitle = "Proceedings of International Symposium on Symbolic and Algebraic Computation. ISSAC '95", corpsource = "Coll. of Comput. Sci., Northeastern Univ., Boston, MA, USA", journalabr = "Int Symp Symbol Algebraic Comput ISSAC Proc", keywords = "Algebra; Computational methods; Computer programming; Computer programming languages; Computer simulation; Computer software; GCL; GNU Common LISP; Interactive computer systems; Interactive languages; Interactive symbolic algebra systems; interactive symbolic algebra systems; Interfaces (computer); mathematical group theory; Mathematical techniques; Message passing interface; Parallel library; parallel library; Parallel processing systems; parallel programming; software libraries; STAR/MPI; symbol manipulation; symbolic algebra; User interfaces", meetingaddress = "Montreal, Can", meetingdate = "Jul 10--12 1995", meetingdate2 = "07/10--12/95", sponsororg = "ACM", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Corno:1995:PTA, author = "F. Corno and P. Prinetto and M. Rebaudengo and M. {Sonza Reorda} and E. Veiluva", title = "A {PVM} tool for automatic test generation on parallel and distributed systems", crossref = "Hertzberger:1995:HPM", pages = "39--44", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Autom. e Inf., Politecnico di Torino, Italy", classification = "B1130B (Computer-aided circuit analysis and design); B2210B (Printed circuit layout and design); C5210B (Computer-aided logic design); C6110P (Parallel programming); C6150N (Distributed systems software); C7410D (Electronic engineering computing)", corpsource = "Dipartimento di Autom. e Inf., Politecnico di Torino, Italy", keywords = "algorithm; ATPG; automatic test generation; Automatic test generation; automatic test pattern; Automatic test pattern generation; automatic test software; circuit CAD; CM-5; DEC Alpha AXP farm; distributed programming; Distributed programming; distributed system; Distributed system; efficient algorithm; Efficient algorithm; electric circuit; Electric circuit; electrical circuit; Electrical circuit; electronic CAD; Electronic CAD; electronic circuit; Electronic circuit; GATTO*; generation; genetic; Genetic algorithm; integrated circuit; large sequential circuits; Large sequential circuits; logic CAD; logic testing; parallel; parallel architectures; parallel programming; Parallel programming; portability; Portability; portable message-passing libraries; Portable message-passing libraries; programming; PVM tool; software libraries; testing; VLSI; VLSI technology", pubcountry = "Germany", thesaurus = "Automatic test software; Circuit CAD; Integrated circuit testing; Logic CAD; Logic testing; Parallel architectures; Parallel programming; Software libraries; VLSI", treatment = "P Practical", xxauthor = "F. Corno and P. Prinetto and M. Rebaudeng and M. {Sonza Reorda} and E. Veiluva", } @Article{DAmbra:1995:CBC, author = "P. D'Ambra and G. Giunta", title = "Concurrent banded {Cholesky} factorization on workstation networks using {PVM}", journal = j-PARALLEL-COMPUTING, volume = "21", number = "3", pages = "487--494", day = "10", month = mar, year = "1995", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Matematica e Applicazioni, Naples Univ., Italy", classification = "C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7310 (Mathematics computing)", corpsource = "Dipartimento di Matematica e Applicazioni, Naples Univ., Italy", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "application programs; Application programs; banded symmetric positive-definite matrix; Banded symmetric positive-definite matrix; cluster; Cluster of workstations; concurrent banded Cholesky factorization; Concurrent banded Cholesky factorization; heterogeneous processors; Heterogeneous processors; linear algebra; mathematics computing; networks; of workstations; optical fiber links; Optical fiber links; parallel programming; Parallel Virtual Machine; software system; Software system; virtual machines; workstation; Workstation networks; workstations", pubcountry = "Netherlands", thesaurus = "Linear algebra; Mathematics computing; Parallel programming; Virtual machines; Workstations", treatment = "A Application; P Practical", } @InProceedings{Davies:1995:NPE, author = "Gregory Davies and Norman Matloff", title = "Network-Specific Performance Enhancements for {PVM}", crossref = "IEEE:1995:PFI", pages = "205--210", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "PVM, a message-passing software system for parallel processing, is used on a wide variety of processor platforms, but this portability restricts execution speed. The work here will address this problem mainly in the context of Ethernet-based systems, proposing two PVM enhancements for such systems. The first enhancement exploits the fact that an Ethernet has broadcast capability. Since unenhanced PVM must, to keep portability, avoid using broadcast, execution speed is sacrificed. In addition, the larger the system, the larger the sacrifice in speed. A solution to this problem is presented. The second enhancement is intended for use in applications in which many concurrent tasks finish at the same time, and thus simultaneously try to transmit to a master process. On an Ethernet, this produces excessively long random backoffs, reducing program speed. An enhancement, termed `programmed backoff,' is proposed.", acknowledgement = ack-nhfb, affiliation = "Tandem Computers", affiliationaddress = "Cupertino, CA, USA", classification = "716.1; 722; 722.3; 722.4; 723; 922.2; C5440 (Multiprocessing systems); C5620L (Local area networks); C6150N (Distributed systems software)", conference = "Proceedings of the 4th IEEE International Symposium on High Performance Distributed Computing", journalabr = "IEEE Int Symp High Perform Distrib Comput Proc", keywords = "Algorithms; Broadcast capability; Broadcasting; Communication channels (information theory); Computer hardware; Computer networks; Computer software portability; Concurrent tasks; Data communication systems; Ethernet-based systems; Hypercube systems, Network-specific performance enhancements; Message-passing software system; Parallel processing; Parallel processing systems; Program speed; Programmed backoff; PVM; Statistical methods", meetingaddress = "Washington, DC, USA", meetingdate = "Aug 2--4 1995", meetingdate2 = "08/02--04/95", sponsor = "IEEE", thesaurus = "Local area networks; Message passing; Parallel processing", } @InProceedings{Davies:1995:NSP, author = "G. Davies and N. Matloff", title = "Network-specific performance enhancements for {PVM}", crossref = "IEEE:1995:PFI", pages = "205--210", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5620L (Local area networks); C6150N (Distributed systems software)", corpsource = "Tandem Comput. Inc., Cupertino, CA, USA", keywords = "broadcast capability; concurrent tasks; Ethernet-based; local area networks; message passing; message-passing; network-specific performance enhancements; parallel processing; program; programmed backoff; PVM; software system; speed; systems", sponsororg = "IEEE Tech. Committee on Distrib. Process.; Northeast Parallel Architectures Centre (NPAC) at Syracuse Univ.; ACM SIGCOMM; Rome Lab", treatment = "A Application; P Practical", } @InProceedings{Decker:1995:TDU, author = "T. Decker and R. Diekmann and R. Luling and B. Monien", title = "Towards developing universal dynamic mapping algorithms", crossref = "IEEE:1995:PSI", pages = "456--459", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Paderborn Univ., Germany", classification = "C5220P (Parallel architecture); C5620 (Computer networks and techniques); C6150J (Operating systems)", keywords = "Bidding-algorithms; Distributed runtime systems; Dynamically generated tasks; Execution-times; MIMD-system; MPI; Optimal K-values; PVM; Randomly selected processors; Universal dynamic mapping algorithms; Universally applicable strategy", thesaurus = "Distributed processing; Resource allocation", } @TechReport{Dongarra:1995:IMS, author = "Jack Dongarra and Steve W. Otto and Marc Snir and David Walker", title = "An Introduction to the {MPI Standard}", type = "Technical report", number = "CS-95-274", institution = inst-UTK, address = inst-UTK:adr, month = jan, year = "1995", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Appears in CACM \cite{Dongarra:1996:MPS}.", URL = "http://www.netlib.org/tennessee/ut-cs-95-274.ps; http://www.netlib.org/utk/papers/intro-mpi/intro-mpi.html; http://www.netlib.org/utk/people/JackDongarra/pdf/ut-cs-95-274.pdf", acknowledgement = ack-nhfb, } @Article{Dongarra:1995:PBC, author = "J. J. Dongarra and T. Hey", title = "The {ParkBench} benchmark collection", journal = j-SUPERCOMPUTER, volume = "11", number = "2-3", pages = "94--114", month = jun, year = "1995", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", classification = "C5440 (Multiprocessing systems); C6150C (Compilers, interpreters and other processors)", fjournal = "Supercomputer", keywords = "Application kernels; Compact research applications; Hierarchical structure; Low-level benchmarks; ParkBench benchmark collection; Performance characteristics; Synthetic compiler benchmark suite", pubcountry = "Netherlands", thesaurus = "Parallel processing; Program compilers", } @InProceedings{Dowaji:1995:LBS, author = "S. Dowaji and C. Roucairol", title = "Load balancing strategy and priority of tasks in distributed environments", crossref = "IEEE:1995:CPI", pages = "15--22", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Lab. PRiSM, Univ. de Versailles-St-Quentin, France", classification = "C1160 (Combinatorial mathematics); C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C6150N (Distributed systems software)", keywords = "Branch and bound algorithms; Distributed environments; Graph theory; Load balancing; Lower bound; VCP", thesaurus = "Combinatorial mathematics; Distributed algorithms; Optimisation; Resource allocation", } @Article{Dragovitsch:1995:PPS, author = "P. Dragovitsch and X. Zhao and L. C. Dennis and G. A. Riccardi", title = "{PVMGeant} --- a Parallel Simulation Code for the {CLAS} Detector at {CEBAF}", journal = j-IJSAHPC, volume = "9", number = "2", pages = "128--137", month = "Summer", year = "1995", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Tue Feb 18 09:07:32 MST 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "Due to the need for extensive and detailed simulations of the CEBAF Large Acceptance Spectrometer (CLAS), the Monte-Carlo code CLASGeant was transferred to a heterogeneous computing cluster and has been linked to the Parallel Virtual Machine (PVM) message-passing library. The resulting simulation package, PvmGeant, achieves an almost linear speedup in physics-event simulation. This article describes modifications to the original GEANT code, its integration with PVM, and performance tests that were conducted at the computing cluster at The Supercomputing Computations Research Institute at Florida State University. Particular attention has been given to measuring the effect of different data structures on the cost of network communication between nodes.", acknowledgement = ack-nhfb, affiliation = "Supercomput. Comput. Res. Inst., Florida State Univ.", affiliationaddress = "Tallahassee, FL, USA", classification = "722.4; 723.1; 723.2; 723.5; 922.2; 941.3", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", journalabr = "Int J Supercomput Appl High Perform Comput", keywords = "CEBAF large acceptance spectrometer (CLAS); Computational complexity; Computer networks; Computer simulation; Computer software; Computing cluster; Continuous electron beam accelerator facility (CEBAF); Data communication systems; Data structures; Message passing library; Monte Carlo methods; Parallel processing systems; Parallel virtual machine; Software package CLASGeant; Software package PvmGeant; Spectrometers", } @InProceedings{Edjlali:1995:DPP, author = "G. Edjlali and G. Agrawal and A. Sussman and J. Saltz", title = "Data parallel programming in an adaptive environment", crossref = "IEEE:1995:PIP", pages = "827--832", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Maryland Univ., College Park, MD, USA", classification = "C6110P (Parallel programming); C6115 (Programming support)", keywords = "Adaptive environment; Communication patterns; Data parallel programming; Data redistribution; Loop bounds; Message passing; Multiblock Navier--Stokes solver; Network of workstations; Performance results; PVM; Runtime library; Runtime support", thesaurus = "Message passing; Parallel programming; Programming environments", } @Article{Fan:1995:DMP, author = "W. C. Fan and J. A. {Halbleib, Sr.}", title = "Distributed multitasking {ITS} with {PVM}", journal = j-TRANS-AM-NUCL-SOC, volume = "72", number = "????", pages = "146--147", month = "????", year = "1995", CODEN = "TANSAO", ISSN = "0003-018X", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sandia Nat. Labs., Albuquerque, NM, USA", classification = "A0250 (Probability theory, stochastic processes, and statistics); A0540 (Fluctuation phenomena, random processes, and Brownian motion); A2820H (Neutron diffusion); C1140G (Monte Carlo methods); C7470 (Nuclear engineering computing)", conflocation = "Philadelphia, PA, USA; 25-29 June 1995", conftitle = "1995 Annual Meeting of American Nuclear Society (papers in summary form only received)", corpsource = "Sandia Nat. Labs., Albuquerque, NM, USA", fjournal = "Transactions of the American Nuclear Society", keywords = "distributed multitasking ITS; Distributed multitasking ITS; engineering computing; ITS Version 3.0; Monte Carlo methods; neutron transport theory; nuclear; PVM communication software; transport codes; Transport codes", thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear engineering computing", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Fang:1995:PMS, author = "Niandong Fang and H. Burkhart", title = "{PEMPI} --- from {MPI} standard to programming environment", crossref = "IEEE:1995:PSP", pages = "31--38", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Inf., Basel Univ., Switzerland", classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Dept. of Inf., Basel Univ., Switzerland", keywords = "Basel Algorithm Classification Scheme; higher abstractions; Higher abstractions; integrated environment; Integrated environment; large scale message passing applications; Large scale message passing applications; machine best-fit implementation; Machine best-fit implementation; message passing; Message Passing Interface; message passing programs; Message passing programs; message passing systems; Message passing systems; MPI standard; parallel programming; parallel programs; Parallel programs; PEMPI; portability; Portability; programmability; Programmability; programmer oriented abstractions; Programmer oriented abstractions; programming environment; Programming environment; programming environments; software standards; software tools; system- oriented level; System-oriented level; widely used standard; Widely used standard", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Message passing; Parallel programming; Programming environments; Software standards; Software tools", treatment = "P Practical", } @InProceedings{Ferrari:1995:TDC, author = "A. J. Ferrari and V. S. Sunderam", title = "{TPVM}: distributed concurrent computing with lightweight processes", crossref = "IEEE:1995:PFI", pages = "211--218", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Virginia Univ., Charlottesville, VA, USA", classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software)", keywords = "Data dependencies; Data-driven scheduling model; Distributed concurrent computing; Experimental auxiliary subsystem; Explicit message passing model; Library interface; Lightweight processes; Load balance; Parallelism; Processor utilization; Scheduling; SPMD-style algorithms; Threads-oriented PVM; TPVM", thesaurus = "Message passing; Parallel processing; Scheduling", } @Article{Fineberg:1995:IMM, author = "Samuel A. Fineberg", title = "Implementing multidisciplinary and multi-zonal applications using {MPI}", journal = j-FRONTIERS-MASS-PAR-COMP-CONF-PROC, pages = "496--503", month = "????", year = "1995", bibdate = "Fri May 24 09:57:40 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95TH8024.", abstract = "Multidisciplinary and multi-zonal applications are codes where two or more distinct parallel programs or copies of a single program are utilized to model a single problem. To support such applications, a program can be divided into several single program multiple data stream (SPMD) applications, each of which solves the equations for a single physical discipline or grid zone. These applications are bound together to form a single multidisciplinary or multi-zonal program in which the constituent pans communicate via point-to-point message passing routines. In this report it is shown that the new Message Passing Interface (MPI) standard is a viable portable library for implementing the message passing portion of multidisciplinary applications. Further, with the extension of a portable loader, fully portable multidisciplinary application programs can be developed. Finally, the performance of MPI is compared to that of some native message passing libraries. This comparison shows that MPI can be implemented to deliver performance commensurate with native message passing libraries.", acknowledgement = ack-nhfb, affiliation = "NASA Ames Research Cent", affiliationaddress = "Moffett Field, CA, USA", classification = "722.2; 722.3; 722.4; 723.1; 723.2; 921.6", conference = "Proceedings of the 5th Symposium on the Frontiers of Massively Parallel Computation", fjournal = "Frontiers of Massively Parallel Computation --- Conference Proceedings", journalabr = "Front Massively Parallel Comput Conf Proc", keywords = "Codes (symbols); Computational methods; Computer software; Computer software portability; Data communication systems; Data handling; Interfaces (computer); Mathematical models; Message passing; Multidisciplinary program; Multiprogramming; Multizonal program; Parallel processing systems; Resource allocation; Single program multiple data stream; Storage allocation (computer); Supervisory and executive programs", meetingaddress = "McLean, VA, USA", meetingdate = "Feb 6--9 1995", meetingdate2 = "02/06--09/95", sponsor = "IEEE Computer Society", } @InProceedings{Ford:1995:NNN, author = "Brian Ford", title = "The New {NAG} Numerical {PVM} Library (or {A} New Parallel Numerical Library Based on {PVM})", crossref = "IFIP:1995:KWC", pages = "??--??", year = "1995", bibdate = "Wed Jan 24 07:11:31 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.nsc.liu.se/~boein/ifip/kyoto/workshop-info/proceedings/ford/ford1.html", acknowledgement = ack-nhfb, } @InProceedings{Franke:1995:AAV, author = "E. A. Franke and S. D. Huffman and W. M. Carter and J. P. Baumgartner and D. J. Wenzel", title = "{AVTP} --- an architecture for visualization using remote parallel\slash distributed computing", crossref = "Grinstein:1995:VDE", journal = j-PROC-SPIE, volume = "2410", pages = "230--237", year = "1995", CODEN = "PSISDG", ISSN = "0277-786X (print), 1996-756X (electronic)", ISSN-L = "0277-786X", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Southwest Res. Inst., San Antonio, TX, USA", classification = "C6130B (Graphics techniques); C6150N (Distributed systems software)", fjournal = "Proceedings of the SPIE --- The International Society for Optical Engineering", keywords = "Advanced Visualization Technology Project; AVTP; Data cache server; High speed data networks; Image generation library; Image specification toolset; Message passing; Parallel processor machines; PVM; Remote computer resources; Remote distributed computing; Remote parallel computing; Research and development; Scalable computing; Shared memory; Streamlines; Surfaces; System architectures; Vector fields; Visualization architecture; Visualization tools", thesaurus = "Cache storage; Data visualisation; File servers; Message passing; Multiprocessing programs", } @InProceedings{Franke:1995:MIS, author = "H. Franke and P. Hochschild and P. Pattnaik and J.-P. Prost and M. Snir", title = "{MPI} on {IBM SP1\slash SP2}: current status and future directions", crossref = "IEEE:1995:PSP", pages = "39--48", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", classification = "C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", keywords = "distributed memory systems; future directions; Future directions; IBM computers; IBM Scalable Power PARALLEL 1; IBM Scalable Power PARALLEL 2; IBM SP1/SP2; initial performance measurements; Initial performance measurements; message passing; MPI; native EUI library; Native EUI library; parallel programming; prototype implementation; Prototype implementation; software libraries; software standards", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Distributed memory systems; IBM computers; Message passing; Parallel programming; Software libraries; Software standards", treatment = "P Practical", } @TechReport{Franke:1995:MPEa, author = "Hubertus Franke", title = "{MPI} programming environment for {IBM SP1\slash SP2}", type = "Research report", number = "RC 19991 (88480)", institution = inst-IBM-WATSON, address = inst-IBM-WATSON:adr, pages = "9", year = "1995", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "In this paper we discuss an implementation of the Message Passing Interface standard (MPI) for the IBM Scalable Power PARALLEL 1 and 2 (SP1, SP2). Key to a reliable and efficient implementation of a message passing library on these machines is the careful design of a UNIX-Socket like layer in the user space with controlled access to the communication adapters and with adequate recovery and flow control. The performance of this implementation is at the same level as the IBM-proprietary message passing library (MPL). We also show that in the IBM SP1 and SP2 we achieve integrated tracing ability, where both system events, such as context switches and page fault etc., and MPI related activities are traced, with minimal overhead to the application program, thus presenting application programmers the trace of all the events that ultimately affect efficiency of a parallel program.", acknowledgement = ack-nhfb, keywords = "Parallel programming (Computer science)", } @InProceedings{Franke:1995:MPEb, author = "Hubertus Franke and C. Eric Wu and Michel Riviere and Pratap Pattnaik and Marc Snir", title = "{MPI} Programming Environment for {IBM SP1\slash SP2}", crossref = "IEEE:1995:PIC", pages = "127--135", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95CH35784.", abstract = "In this paper we discuss an implementation of the Message Passing Interface standard (MPI) for the IBM Scalable Power PARALLEL 1 and 2 (SP1, SP2). Key to a reliable and efficient implementation of a message passing library on these machines is the careful design of a UNIX-Socket like layer in the user space with controlled access to the communication adapters and with adequate recovery and flow control. The performance of this implementation is at the same level as the IBM-proprietary message passing library (MPL). We also show that in the IBM SP1 and SP2 we achieve integrated tracing ability, where both system events, such as context switches and page fault etc., and MPI related activities are traced, with minimal overhead to the application program, thus presenting application programmers the trace of all the events that ultimately affect efficiency of a parallel program.", acknowledgement = ack-nhfb, affiliation = "IBM T. J. Watson Research Cent", affiliationaddress = "Yorktown Heights, NY, USA", classification = "722.2; 722.4; 723; 723.1; 723.1.1; C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conference = "Proceedings of the 15th International Conference on Distributed Computing Systems", conftitle = "Proceedings of 15th International Conference on Distributed Computing Systems", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", journalabr = "Proc Int Conf Distrib Comput Syst", keywords = "adequate recovery; Adequate recovery; application program interfaces; application programmers; Application programmers; Application programming interfaces; communication adapters; Communication adapters; Computer architecture; Computer programming; Computer software; Computer system recovery; Fault tolerant computer systems; flow control; Flow control; IBM Scalable Power PARALLEL 1; IBM SP1/SP2; integrated tracing ability; Integrated tracing ability; Interfaces (computer); message passing; Message passing interface standard; message passing interface standard; Message passing interface standard; Message passing library; MPI programming environment; page fault; Page fault; Parallel processing systems; parallel program; Parallel program; parallel programming; Performance; Power parallel system; programming environments; Software engineering; Systems analysis; UNIX; UNIX- Socket like layer; UNIX-Socket like layer", meetingaddress = "Vancouver, Can", meetingdate = "May 30--Jun 2 1995", meetingdate2 = "05/30--06/02/95", sponsor = "IEEE Computer Society", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", thesaurus = "Application program interfaces; Message passing; Parallel programming; Programming environments", treatment = "A Application; P Practical", } @InProceedings{Ge:1995:DHA, author = "Yuzhen Ge and L. T. Watson and E. G. {Collins, Jr.}", title = "Distributed homotopy algorithms for {$ H^2 / H^\infty $} controller synthesis", crossref = "Bailey:1995:PSS", pages = "84--89", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Butler Univ., Indianapolis, IN, USA", classification = "C1310 (Control system analysis and synthesis methods); C3220 (Controllers); C4240P (Parallel programming and algorithm theory)", keywords = "Distributed homotopy algorithms; H/sup 2//H/sup infinity / controller synthesis; High performance computation; Industrial design environment; Jacobian matrix computation; Mixed-norm controller synthesis problem; Parallel Virtual Machine; UNIX workstations", thesaurus = "Control system synthesis; Controllers; Distributed algorithms", } @InProceedings{Gentzsch:1995:STP, author = "W. Gentzsch and U. Block and F. Ferstl", title = "Software tools for parallel computers and workstation clusters", crossref = "Ferenczi:1995:PAH", pages = "23--42", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "GENIAS Software GmbH, Neutraubling, Germany", classification = "C5220P (Parallel architecture); C5430 (Microcomputers); C5440 (Multiprocessing systems); C5540 (Terminals and graphic displays); C6115 (Programming support)", keywords = "Benchmark results; EXPRESS; FORGE 90; GENIAS; Intel iPSC/860; NCUBE/2; Parallel codes; Parallel computers; Parsytec Multicluster; PVM/MPI; Software tools; Workstation clusters; XHPF", thesaurus = "Parallel processing; Software tools; Workstations", } @InProceedings{Gianuzzi:1995:UPI, author = "V. Gianuzzi and F. Merani", title = "Using {PVM} to Implement a Distributed Dependable Simulation System", crossref = "IEEE:1995:PEW", pages = "529--535", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Inf. e Sci. dell'Inf., Genoa Univ., Italy", classification = "C4240P (Parallel programming and algorithm theory); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", corpsource = "Dipartimento di Inf. e Sci. dell'Inf., Genoa Univ., Italy", keywords = "algorithms; checkpoint-restart mechanism; Checkpoint-restart mechanism; distributed; distributed algorithms; Distributed algorithms; distributed dependable simulation system; Distributed dependable simulation system; fault tolerant; fault tolerant computing; Fault tolerant mechanisms; high speed; High speed interconnection; interconnection; mechanisms; message; passing; PVM routines; simulations modelling; Simulations modelling; synchronisation; Virtual Time", sponsororg = "Euromicro; Assoc.Italiana per Inf. Calcolo Autom", thesaurus = "Distributed algorithms; Fault tolerant computing; Message passing; Synchronisation", treatment = "P Practical", } @InProceedings{Gillich:1995:FPP, author = "S. Gillich and B. Ries", title = "Flexible, portable performance analysis for {PARMACS} and {MPI}", crossref = "Hertzberger:1995:HPM", pages = "937--??", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Greenfield:1995:OPS, author = "J. Greenfield", title = "An Overview of the {PVM} Software System", crossref = "IEEE:1995:ISE", pages = "17--23", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6150N (Distributed systems software)", corpsource = "Dept. of Electr. and Comput. Eng., New Mexico Univ., Albuquerque, NM, USA", keywords = "analysis; debugging; machine; message passing; parallel processing; Parallel Virtual Machine; performance; PVM; software system; virtual; virtual machines; visualization tools", treatment = "P Practical", } @InProceedings{Gropp:1995:DPM, author = "W. Gropp and E. Lusk", title = "Dynamic process management in an {MPI} setting", crossref = "IEEE:1995:PSI", pages = "530--533", year = "1995", CODEN = "PSPDF8", ISSN = "1063-6374", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95TB8131.", abstract = "We describe an architecture for the runtime environment for parallel applications as prelude to describing how parallel application might interface to their environment in a portable way. We propose extensions to the Message-Passing Interface (MPI) Standard that provide for dynamic process management, including spawning of new processes by a running application and connection to existing processes to support client\slash server applications. Such extensions are needed if more of the runtime environment for parallel programs is to be accessible to MPI programs or to be themselves written using MPI. The extensions proposed here are motivated by real applications and fit cleanly with existing concepts of MPI. No changes to the existing MPI Standard are proposed, thus all present MPI programs will run unchanged.", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab.", affiliationaddress = "Argonne, IL, USA", classification = "722.2; 722.3; 722.4; 723.1; 902.2; C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6150N (Distributed systems software)", conference = "Proceedings of the 1995 7th IEEE Symposium on Parallel and Distributed Processing", conftitle = "Proceedings of Seventh IEEE Symposium on Parallel and Distributed Processing", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", journalabr = "IEEE Symp Parallel Distrib Process Proc", keywords = "Client/server applications; Computer architecture; Computer networks; Computer software; Computer systems programming; Computer workstations; Data communication systems; dynamic process management; Dynamic process management; Interfaces (computer); message passing; Message-passing interface; MPI setting; parallel applications; Parallel applications; parallel processing; Parallel processing systems; Parallel programs; Process control; process management; Process management; Real time systems; Resource allocation; runtime environment; Runtime environment; Runtime environments; Scheduling; Standards", meetingaddress = "San Antonio, TX, USA", meetingdate = "Oct 25--28 1995", meetingdate2 = "10/25--28/95", sponsor = "IEEE", sponsororg = "IEEE Comput Soc. Tech. Committee on Comput. Architecture; IEEE Comput. Soc. Tech. Committee on Distributed Process.; IEEE Comput. Soc. Dallas Chapter", thesaurus = "Message passing; Parallel processing", treatment = "P Practical", } @Article{Gropp:1995:EIS, author = "W. D. Gropp and E. Lusk", title = "Experiences with the {IBM SP1}", journal = j-IBM-SYS-J, volume = "34", number = "2", pages = "249--262", year = "1995", CODEN = "IBMSA7", ISSN = "0018-8670", bibdate = "Tue Mar 19 17:38:46 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.research.ibm.com/journal/sj34-2.html#seven", abstract = "One of the first IBM parallel processing computers---the SP1*---and the largest, with 128 nodes, was installed in 1993 at Argonne National Laboratory. It took only days, not months, to prepare for and migrate applications to this parallel supercomputer, demonstrating that high performance, parallelism, and portability can coexist. This paper describes the early experiences with the SP1 at Argonne, which provide lessons for supercomputer system designers and users alike. We explore what features of software technology and system architecture enabled immediate and successful use of the SP1. The paper concludes with a brief indication of why the move to the SP2* software environment using the SP2 communication adapters, the use of the emerging Message-Passing Interface standard, and the continued use of the SP1 processors have been successful.", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150N (Distributed systems software)", fjournal = "IBM Systems Journal", keywords = "High performance; IBM parallel processing computers; IBM SP1; Message passing interface standard; Parallel supercomputer; Parallelism; Portability; Software technology; SP1 processors; SP2 communication adapters; SP2 software environment; Supercomputer system designers; System architecture", language = "English", pubcountry = "USA", thesaurus = "IBM computers; Message passing; Parallel architectures; Parallel machines; Parallel programming", } @InProceedings{Gropp:1995:IMM, author = "W. Gropp and E. Lusk", title = "Implementing {MPI}: the 1994 {MPI Implementors' Workshop}", crossref = "IEEE:1995:PSP", pages = "55--59", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "implementation process; Implementation process; message passing; MPI implementation effort; parallel computing; Parallel computing; parallel library; Parallel library; parallel programming; software libraries; software standards; standard message-passing library interface; Standard message-passing library interface; subroutines", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Message passing; Parallel programming; Software libraries; Software standards; Subroutines", treatment = "P Practical", } @InProceedings{Gropp:1995:MGX, author = "W. Gropp and E. Karrels and E. Lusk", title = "{MPE} graphics-scalable {X11} graphics in {MPI}", crossref = "IEEE:1995:PSP", pages = "49--54", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", classification = "C6110P (Parallel programming); C6130B (Graphics techniques); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "communication patterns; Communication patterns; communication traffic; Communication traffic; computer graphics; library based message passing; Library based message passing; message passing; MPE graphics; MPI; MPI implementation; MPI message passing standard; MPI Standard; parallel graphics library; Parallel graphics library; parallel graphics operations; Parallel graphics operations; parallel graphics routines; Parallel graphics routines; parallel programming; parallel programs; Parallel programs; parallel semantics; Parallel semantics; programming libraries; Programming libraries; scalable X11 graphics; Scalable X11 graphics; semantics; Semantics; software standards; subroutines; user control; User control; X-based parallel graphics library", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Computer graphics; Message passing; Parallel programming; Software standards; Subroutines", treatment = "P Practical", } @InProceedings{Gropp:1995:MMI, author = "W. Gropp and E. Lusk", title = "The {MPI} Message-Passing Interface Standard: Overview and Status", crossref = "Dongarra:1995:HPC", pages = "265--270", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Guan:1995:SCC, author = "Xiaojun Guan and Richard J. Mural and Edward C. Uberbacher", title = "Sequence comparison on a cluster of workstations using the {PVM} system", crossref = "IEEE:1995:PIP", pages = "190--195", year = "1995", CODEN = "PSPDF8", DOI = "https://doi.org/10.1109/IPPS.1995.395931", ISSN = "1063-6374", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Sequence comparison is one of the most important tools in molecular biology research. As the amount of DNA data increases rapidly, efficient sequence comparison algorithms are essential in studying newly discovered sequences. We have implemented a distributed sequence comparison algorithm by Smith and Waterman on a cluster of workstations using the PVM paradigm. This implementation has achieved similar performance to the Intel iPSC\slash 860 Hypercube, a massively parallel computer. The distributed Smith-Waterman algorithm serves as a search tool for two Internet servers GRAIL and GENQUEST. This paper describes the implementation and the performance of the algorithm.", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Natl Lab", affiliationaddress = "Oak Ridge, TN, USA", classification = "461.2; 721.1; 722.4; 723.1; C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5620W (Other computer networks); C7330 (Biology and medical computing)", conference = "Proceedings of the IEEE 9th International Parallel Processing Symposium", corpsource = "Div. of Comput. Sci. and Math., Oak Ridge Nat. Lab., TN, USA", journalabr = "IEEE Symp Parallel Distrib Process Proc", keywords = "Algorithms; cluster of workstations; Cluster of workstations; Computational complexity; Computer software; Computer workstations; DNA; DNA data; DNA sequences; GENQUEST; GRAIL; hypercube; hypercube networks; Intel iPSC/860; Intel iPSC/860 hypercube; Internet; Internet servers; Internet servers GRAIL; massively parallel computer; Massively parallel computer; medical computing; molecular biology research; Molecular biology research; molecular biophysics; Parallel processing systems; Parallel virtual machine; performance; Performance; PVM system; sequence comparison; Sequence comparison; Smith Waterman algorithm", meetingaddress = "Santa Barbara, CA, USA", meetingdate = "Apr 25--28 1995", meetingdate2 = "04/25--28/95", sponsor = "IEEE", sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel Process", thesaurus = "DNA; Hypercube networks; Internet; Medical computing; Molecular biophysics", treatment = "A Application; P Practical", } @InProceedings{Guarracino:1995:PMB, author = "M. R. Guarracino and F. Perla", title = "A parallel modified block {Lanczos} algorithm for distributed memory architectures", crossref = "IEEE:1995:PEW", pages = "424--431", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dipartimento di Matematica e Applicazioni, Naples Univ., Italy", classification = "C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming)", keywords = "Block column wrap-around matrices; Block Lanczos algorithm; Distributed memory architectures; Eigenproblems; Load-balancing; Parallel block Lanczos algorithm; Parallel software", thesaurus = "Distributed memory systems; Eigenvalues and eigenfunctions; Matrix algebra; Parallel algorithms", } @InProceedings{Hardwick:1995:PVL, author = "J. C. Hardwick", title = "Porting a vector library: a comparison of {MPI}, {Paris}, {CMMD} and {PVM}", crossref = "IEEE:1995:PSP", pages = "68--77", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", keywords = "CM-2; CM-5; CMMD; compiler target; Compiler target; Cray C90; debugging; Debugging; message passing; MPI; NESL; nested data-parallel languages; Nested data-parallel languages; parallel; parallel languages; parallel programming; parallel vector library CVL; Parallel vector library CVL; Paris; portable MPI implementation; Portable MPI implementation; Proteus; PVM; RISC based MPP architectures; software libraries; subroutines; vector library CVL; vector library porting; Vector library porting; vector processor systems", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Message passing; Parallel languages; Parallel programming; Software libraries; Subroutines; Vector processor systems", treatment = "P Practical", } @InProceedings{Hariri:1995:STE, author = "S. Hariri and Sung-Yong Park and R. Reddy and M. Subramanyan and R. Yadav and G. C. Fox and M. Parashar", title = "Software tool evaluation methodology", crossref = "IEEE:1995:PIC", pages = "3--10", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Northeast Parallel Archit. Center, Syracuse Univ., NY, USA", classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", keywords = "Alpha cluster; ATM; Distributed computing software; Distributed systems platforms; Ethernet; Express; FDDI; IBM-SP1; Message passing tools; Multi-level evaluation methodology; P4; Parallel computing software; Programming paradigms; PVM; Software tool evaluation methodology; SUN workstations", thesaurus = "Message passing; Parallel programming; Software performance evaluation; Software tools", } @InProceedings{Hausner:1995:EIP, author = "M. Hausner and M. Burrows and C. A. Thekkath", title = "Efficient implementation of {PVM} on the {AN2 ATM} network", crossref = "Hertzberger:1995:HPM", pages = "562--569", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. fur Computersyst., Eidgenossische Tech. Hochschule, Zurich, Switzerland", classification = "B6150C (Communication switching); B6210L (Computer communications); C5620L (Local area networks); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Inst. fur Computersyst., Eidgenossische Tech. Hochschule, Zurich, Switzerland", keywords = "Alpha workstations; AN2 ATM network; asynchronous transfer mode; ATM link bandwidth; coarse-grained; Coarse-grained multicomputer; end-to-end PVM communication performance; End-to-end PVM communication performance; environments; high-speed ATM network; High-speed ATM network; high-speed network; High-speed network; local area networks; multicomputer; programming; PVM environment; workstation cluster; Workstation cluster; workstations", pubcountry = "Germany", thesaurus = "Asynchronous transfer mode; Local area networks; Programming environments; Workstations", treatment = "P Practical", } @InProceedings{Hoekstra:1995:CPP, author = "A. G. Hoekstra and F. {Van der Linden} and P. M. A. Sloot and L. O. Hertzberger", title = "Comparing the {Parix} and {PVM} parallel programming environments", crossref = "Fritzson:1995:PPA", pages = "288--292", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Parallel Sci. Comput. and Simulation Group, Amsterdam Univ., Netherlands", keywords = "communication capabilities; complexity analysis; computational complexity; development; environments; floating; floating point arithmetic; functionality; generic; global communication times; native parallel programming environments; parallel architectures; parallel programming; parallel programming environments; Parix parallel; Parsytec GCel; Parsytec PowerXplorer; performance; performance penalties; point communication times; point performance; point to; portability; portable parallel program; PowerPC chip; programmability; programming; programming environments; PVM parallel programming environments; software; software performance evaluation; software tools; support; time; tool; transputer systems", pubcountry = "Netherlands", treatment = "P Practical", xxauthor = "A. G. Hoekstra and P. M. A. Sloot and L. O. Hertzberger", xxcrossref = "VanKatwijk:1995:AAC", } @Article{Hollerbach:1995:FDA, author = "Rainer Hollerbach", title = "Fast dynamo action in spherical geometry: Numerical calculations using parallel virtual machines", journal = j-COMPUT-PHYS, volume = "9", number = "4", pages = "460--??", month = jul, year = "1995", CODEN = "CPHYE2", DOI = "https://doi.org/10.1063/1.168547", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Wed Apr 10 08:45:55 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/computphys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://aip.scitation.org/doi/10.1063/1.168547", acknowledgement = ack-nhfb, ajournal = "Comput. Phys", fjournal = "Computers in Physics", journal-URL = "https://aip.scitation.org/journal/cip", } @InProceedings{Hondroudakis:1995:PEV, author = "A. Hondroudakis and R. Procter and K. Shanmugam", title = "Performance evaluation and visualization with {VISPAT}", crossref = "Malyshkin:1995:PCT", pages = "180--185", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Edinburgh Univ., UK", classification = "C6110P (Parallel programming); C6110V (Visual programming); C6115 (Programming support)", keywords = "Graphical front end; Message passing; MPI; Parallel programs; Performance analysis; Program execution; VISPAT; Visualization", thesaurus = "Data visualisation; Parallel programming; Software performance evaluation; Software tools; Visual programming", } @Article{Hong:1995:PNP, author = "Lin Hong and Chen Huaping", title = "{PVM} and network parallel computing", journal = j-MINI-MICRO-SYSTEMS, volume = "16", number = "2", pages = "53--58", month = feb, year = "1995", CODEN = "XWJXEH", ISSN = "1000-1220", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Technol., Univ. of Sci. and Technol. of China, Hefei, China", classification = "C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci. and Technol., Univ. of Sci. and Technol. of China, Hefei, China", fjournal = "Mini-Micro Systems", keywords = "computing model; Computing model; load balancing; Load balancing; message passing; Message passing; network parallel computing; Network parallel computing; parallel granularity; Parallel granularity; parallel processing; programming methodology; Programming methodology; PVM; resource allocation; software environment; Software environment; virtual machines", language = "Chinese", pubcountry = "China", thesaurus = "Message passing; Parallel processing; Resource allocation; Virtual machines", treatment = "P Practical", } @InProceedings{Hui:1995:SPS, author = "Chi-Chung Hui and Mounir Hamdi and Ishfaq Ahmad", title = "Software platform for solving {PDEs} on distributed systems: Implementation issues and performance prediction", crossref = "IEEE:1995:PNA", pages = "383--388", year = "1995", CODEN = "PSICD2", ISSN = "0730-6512", bibdate = "Fri May 24 09:58:00 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95CB35838.", abstract = "This paper describes the implementation and performance of a parallel platform for solving partial differential equations (PDEs) on distributed systems. The platform has been implemented using PVM for a network of workstations. It allows the inclusion of a wide range of parameters and programming aids. The PDEs are specified in the form of finite difference equations. With a given set of parameters and a partitioning strategy, the platform provides facilities to record and predict the performance of an application before running it. The performance prediction model helps the user to identify the major bottlenecks of the platform such that by reducing them, the speedup can be improved. We also present analysis of various factors that can have drastic effect on the speedup, which allows the user to tune a number of parameters to maximize the performance.", acknowledgement = ack-nhfb, affiliation = "Hong Kong Univ of Science and Technology", affiliationaddress = "Kowloon, Hong Kong", classification = "722.2; 722.3; 722.4; 723.1; 921.2; 921.6; C4170 (Differential equations); C5620L (Local area networks); C6150N (Distributed systems software)", conference = "Proceedings of the 19th Annual International Computer Software and Applications Conference COMPSAC '95", journalabr = "Proc IEEE Comput Soc Int Comput Software Appl Conf", keywords = "Application; Asynchronous communication library routines; Bottlenecks; Computer software; Computer workstations; Data communication systems; Distributed systems; Finite difference equations; Finite difference method; Mathematical models; Parallel platform; Parallel virtual machine (PVM) system; Partial differential equation solving; Partial differential equations; Partitioning strategy; Performance prediction; Performance recording; Programming aids; PVM; Software platform; Speedup, Parallel processing systems; Workstation network", meetingaddress = "Dallas, TX, USA", meetingdate = "Aug 9--11 1995", meetingdate2 = "08/09--11/95", sponsor = "IEEE", thesaurus = "Finite difference methods; Local area networks; Parallel processing; Partial differential equations; Software performance evaluation; Workstations", } @MastersThesis{Humphres:1995:LBE, author = "Christopher Wade Humphres", title = "A load balancing extension for the {PVM} software system", type = "M.E.E. thesis", school = inst-UAL-EE, address = inst-UAL-EE:adr, pages = "viii + 98", year = "1995", bibdate = "Mon Jan 15 16:50:57 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "Computer networks; Parallel computers.", } @Article{Hungenahally:1995:PIQ, author = "A. Hungenahally and A. Suresh", title = "{PVM} implementation of quadtree building algorithms on {SIMD} hypercube system", journal = j-IEEE-INT-CONF-ALG-ARCH-PAR-PROC, volume = "2", pages = "855--858", month = "????", year = "1995", bibdate = "Fri May 24 09:58:00 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95TH0682-5.", abstract = "Representation of Data using hierarchical data structures is commonly used in applications such as Computer graphics, Digital image processing, Computer Vision and techniques are being evolved for efficient representation of these data. Transforming bilevel images to linear quadtrees is a way of representing the high-volume data. In this paper, the preliminary investigation and results thus obtained for transforming binary images to linear quadtrees using Parallel Virtual Machine System Software are presented. Single Instruction Multiple Data hypercube algorithms implemented using PVM software was tested under DOS operating system on IBM compatible PCs. The quadtree algorithm generates locational codes in pre-order and generally runs in O(log n) time and this paper tested the feasibility of achieving this time for an SIMD machine.", acknowledgement = ack-nhfb, affiliation = "Griffith Univ", affiliationaddress = "Brisbane, Aust", classification = "722.4; 723; 723.2", conference = "Proceedings of the IEEE 1st International Conference on Algorithms and Architectures for Parallel Processing. Part 2 (of 2)", fjournal = "IEEE International Conference on Algorithms and Architectures for Parallel Processing", journalabr = "IEEE Int Conf Algorithms Archit Parall Process", keywords = "Codes (symbols); Computer software; Data structures; DOS; Hierarchical data structures; Hypercube; Image processing; Parallel algorithms; Parallel processing systems; Parallel virtual machine; Personal computers; Quadtree; Single instruction multiple data", meetingaddress = "Brisbane, Aust", meetingdate = "Apr 19--21 1995", meetingdate2 = "04/19--21/95", sponsor = "IEEE", } @Article{Ingle:1995:MAS, author = "N. K. Ingle and T. J. Mountziaris", title = "A multifrontal algorithm for the solution of large systems of equations using network-based parallel computing", journal = j-COMP-CHEM-ENG, volume = "19", number = "6-7", pages = "671--681", month = jun # "--" # jul, year = "1995", CODEN = "CCENDW", ISSN = "0098-1354", ISSN-L = "0098-1354", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Chem. Eng., State Univ. of New York, Buffalo, NY, USA", classification = "C4140 (Linear algebra); C4160 (Numerical integration and differentiation); C4170 (Differential equations); C4185 (Finite element analysis); C4240P (Parallel programming and algorithm theory); C7320 (Physics and chemistry computing)", fjournal = "Computers \& Chemical Engineering", keywords = "Chemical vapor deposition; Distributed computing environment; Finite element analysis; Flow; Granularity; Heat transfer problem; In-core computations; Intrinsic fault tolerance capabilities; Large sparse equation systems; Multifrontal algorithm; Network-based parallel computing; Networked workstations; Out-of-core computations; Parallel Virtual Machine software; Performance; Processors; Reaction processes; Speedups; Thin films; Transport processes", pubcountry = "UK", thesaurus = "Chemical reactions; Chemical vapour deposition; Chemically reactive flow; Chemistry computing; Differential equations; Finite element analysis; Heat transfer; Integration; Parallel algorithms; Physics computing; Software fault tolerance; Sparse matrices; Thin films; Transport processes; Workstations", } @TechReport{Jann:1995:AMP, author = "Joefon Jann and Hubertus Franke", title = "Analysis of an {MPI} program using {UTE} on the {IBM SP2}", type = "Research report", number = "RC 20085 (88832)", institution = inst-IBM-WATSON, address = inst-IBM-WATSON:adr, pages = "11", year = "1995", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We describe an implementation of a 2D-FFT (Complex-Complex) program in MPI-F on the SP2 and show its actual performance. The purpose of this paper is to illustrate how we use the new tracing utility UTE/MPI provided in MPI-F to verify the correctness of our algorithm, to provide timing statistics summaries, and to unravel other system activities, often unexpected by the user, that affect the total elapsed time of the program.", acknowledgement = ack-nhfb, keywords = "Multiprocessors", } @Article{Jeremiassen:1995:RFS, author = "T. E. Jeremiassen and S. J. Eggers", title = "Reducing false sharing on shared memory multiprocessors through compile time data transformations", journal = j-SIGPLAN, volume = "30", number = "8", pages = "179--188", month = aug, year = "1995", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 7 07:51:54 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We have developed compiler algorithms that analyze explicitly parallel programs and restructure their shared data to reduce the number of false sharing misses. The algorithms analyze per-process shared data accesses, pinpoint the data structures that are susceptible to false sharing and choose an appropriate transformation to reduce it. The transformations either group data that is accessed by the same processor or separate individual data items that are shared. We evaluates that technique. We show through simulation that our analysis successfully identifies the data structures that are responsible for most false sharing misses, and then transforms them without unduly decreasing spatial locality. The reduction in false sharing positively impacts both execution time and program scalability when executed on a KSR2. Both factors combine to increase the maximum achievable speedup for all programs, more than doubling it for several. Despite being able to only approximate actual inter-processor memory accesses, the compiler-directed transformations always outperform programmer efforts to eliminate false sharing.", acknowledgement = ack-nhfb, affiliation = "AT and T Bell Labs., Murray Hill, NJ, USA", classification = "C6120 (File organisation); C6150C (Compilers, interpreters and other processors); C6150N (Distributed systems software)", fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "Compile time data transformations; Compiler algorithms; Data structures; Execution time; False sharing; False sharing misses; Inter-processor memory access; KSR2; Maximum achievable speedup; Parallelizing compilers; Program scalability; Shared data access; Shared memory multiprocessors; Simulation; Spatial locality", thesaurus = "Data structures; Parallel programming; Program compilers; Shared memory systems; Virtual machines", } @Article{Jin:1995:LTP, author = "Lan Jin and Lan Yang", title = "A laboratory for teaching parallel computing on parallel structures", journal = j-SIGCSE, volume = "27", number = "1", pages = "71--75", month = mar, year = "1995", CODEN = "SIGSD3", ISSN = "0097-8418 (print), 2331-3927 (electronic)", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., California State Univ., Fresno, CA, USA", classification = "C0220 (Computing education and training); C5220P (Parallel architecture); C6110P (Parallel programming); C6150N (Distributed systems software); C7430 (Computer engineering)", fjournal = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688", keywords = "Hardware level; Message-passing programming teaching; Multi-computer; Parallel computing teaching laboratory; Parallel processing; Parallel structure; Parallel systems; Parallel Virtual Machine; PVM; Reconfiguration; Software level; Structural implementation", thesaurus = "Computer science education; Laboratories; Message passing; Parallel machines; Parallel programming; Reconfigurable architectures; Teaching", } @InProceedings{Juric:1995:UPV, author = "M. Juric and W. D. Potter and M. Plaksin", title = "Using the {Parallel Virtual Machine} for hunting snake-in-the-box codes", crossref = "Arabnia:1995:TRA", pages = "97--102", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Inf. Syst., DePaul Univ., Chicago, IL, USA", classification = "C1180 (Optimisation techniques); C4230M (Multiprocessor interconnection); C6150E (General utility programs); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci. and Inf. Syst., DePaul Univ., Chicago, IL, USA", keywords = "adapted code; Adapted code; algorithm; combinatorial explosion; Combinatorial explosion; genetic; Genetic algorithm; genetic algorithms; hypercube networks; maximum length snake; Maximum length snake; multiprocessing; parallel; parallel machines; Parallel single processor machine cluster; Parallel Virtual Machine; programs; PVM software package; single processor machine cluster; snake-in-the-box code hunting; Snake-in-the-box code hunting; system monitoring; systems; transputer; utility programs; virtual machines", pubcountry = "Netherlands", thesaurus = "Genetic algorithms; Hypercube networks; Multiprocessing programs; Parallel machines; System monitoring; Transputer systems; Utility programs; Virtual machines", treatment = "P Practical", } @InProceedings{Kalns:1995:DPD, author = "E. T. Kalns and L. M. Ni", title = "{DaReL}: a portable data redistribution library for distributed-memory machines", crossref = "IEEE:1995:PSP", pages = "78--87", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6140D (High level languages); C6150N (Distributed systems software)", keywords = "Algorithm computation; Algorithm phases; DaReL; Data decomposition; Data exchange; Data parallel Fortran languages; Distributed memory platforms; Distributed-memory machines; High Performance Fortran; HPF; IBM SP-1; Message passing standard; MPI primitives; Multi-dimensional data redistribution; Portable data redistribution library; Processor memories; Program overhead; Regular distribution patterns; Run-time data redistribution; Run-time data redistribution primitives", thesaurus = "Distributed memory systems; FORTRAN; Message passing; Parallel languages; Parallel programming; Software libraries; Software portability; Software standards; Subroutines", } @InProceedings{Katkere:1995:VBW, author = "A. Katkere and J. Schlenzig and R. Jain", title = "{VRML-Based WWW} Interface to {MPI} Video", crossref = "Nadeau:1995:SVR", pages = "25--31, 137", month = "????", year = "1995", bibdate = "Thu Mar 28 05:45:25 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Visual Comput. Lab., California Univ., San Diego, La Jolla, CA, USA", keywords = "SGML; Virtual Reality Modeling Language; VRML", xxpages = "25--32", } @InProceedings{Kauranne:1995:OHM, author = "T. Kauranne and J. Oinonen and S. Saarinen and O. Serimaa and J. Hietaniemi", title = "The operational {HIRLAM} 2 model on parallel computers (weather forecasting)", crossref = "Hoffmann:1995:CAP", pages = "63--74", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Joensuu Univ., Finland", classification = "A9260X (Weather analysis and prediction); C4185 (Finite element analysis); C6110P (Parallel programming); C7340 (Geophysics computing)", keywords = "Atmosphere; Binary GRIB files; Distributed memory computers; Helmholtz equation solver; Maintainability; Message passing interface; Meteorology; Numerical model; Operational HIRLAM 2 model; Parallel programming; Portability; Reproducibility; Semi-implicit Eulerian finite difference method; Serial code noninterference; Transposition strategy; Weather forecasting", thesaurus = "Digital simulation; Distributed processing; Finite difference methods; Finite element analysis; Geophysics computing; Message passing; Numerical analysis; Parallel processing; Parallel programming; Software maintenance; Software portability; Weather forecasting", } @InProceedings{Klingebiel:1995:COD, author = "P. Klingebiel and R. Diekmann and U. Lefarth and M. Fischer and J. Seuss", title = "{CAMeL\slash PVM}: an open, distributed {CAE} environment for modelling and simulating mechatronic systems", crossref = "Breitenecker:1995:ESC", pages = "645--650", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Mechatronics Lab., Paderborn Univ., Germany", classification = "C6150N (Distributed systems software); C7440 (Civil and mechanical engineering computing)", keywords = "Ada tasking; Automatic load balancing procedures; CAMeL/PVM; Channels; Communication management; Computer-aided engineering design environment; Computer-Aided Mechatronic Laboratory; Computer-aided modelling; Heterogeneous workstation clusters; Mechatronic systems simulation; Message-passing environment; Open distributed CAE environment; Parallel Virtual Machine; Process management; Program modules; Unix-based extension", thesaurus = "Computer aided engineering; Digital simulation; Mechanical engineering computing; Mechatronics; Message passing; Open systems; Parallel processing; Resource allocation; Unix; Virtual machines", } @InProceedings{Klingebiel:1995:CPO, author = "P. Klingebiel and R. Diekmann and U. Lefarth and M. Fischer and J. Seuss", title = "{CAMeL\slash PVM}: An Open, Distributed {CAE} Environment for Modelling and Simulating Mechatronic Systems", crossref = "Breitenecker:1995:ESC", pages = "645--650", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150N (Distributed systems software); C7440 (Civil and mechanical engineering computing)", corpsource = "Mechatronics Lab., Paderborn Univ., Germany", keywords = "Ada tasking; aided modelling; automatic load balancing; CAMeL/PVM; channels; communication; computer aided engineering; computer-; Computer-Aided; computer-aided engineering design environment; digital simulation; engineering computing; extension; heterogeneous workstation clusters; management; mechanical; Mechatronic Laboratory; mechatronic systems simulation; mechatronics; message passing; message-passing environment; open; open distributed CAE environment; Parallel; parallel processing; procedures; process management; program modules; resource allocation; systems; Unix; Unix-based; Virtual Machine; virtual machines", pubcountry = "Netherlands", treatment = "P Practical", } @InProceedings{Kofakis:1995:DPI, author = "P. Kofakis and J. Louis", title = "Distributed parallel implementation of seismic algorithms", crossref = "Hassanzadeh:1995:MMG", journal = j-PROC-SPIE, volume = "2571", pages = "229--238", year = "1995", CODEN = "PSISDG", ISSN = "0277-786X (print), 1996-756X (electronic)", ISSN-L = "0277-786X", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "METHOD Ltd., Holargos, Greece", classification = "A9130F (Seismic waves); A9365 (Data and information; A9385 (Instrumentation and techniques for geophysical, hydrospheric and lower atmosphere research); acquisition, processing, storage and dissemination in geophysics); C1180 (Optimisation techniques); C4130 (Interpolation and function approximation); C4170 (Differential equations); C5260 (Digital signal processing); C6110P (Parallel programming); C6150N (Distributed systems software); C7340 (Geophysics computing)", fjournal = "Proceedings of the SPIE --- The International Society for Optical Engineering", keywords = "Distributed parallel implementation; Eikonical equations; Fermat's principle; Finite difference extrapolation; First arrival; Heterogeneous workstations; Irregular grids; Minimum time ray-tracer; Parallel virtual machine; Seismic algorithms; Seismic waves; Travel times", thesaurus = "Distributed memory systems; Extrapolation; Finite difference methods; Geophysical signal processing; Local area networks; Minimisation; Parallel algorithms; Ray tracing; Seismic waves", } @Article{Koski:1995:STL, author = "Kimmo Koski", title = "A step towards large scale parallelism: {Building} a parallel computing environment from heterogeneous resources", journal = j-FUT-GEN-COMP-SYS, volume = "11", number = "4--5", pages = "491--498", month = aug, year = "1995", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jul 15 09:06:06 MDT 2005", bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, affiliation = "Centre for Sci. Comput., Espoo, Finland", classification = "C0200 (General computer topics); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150C (Compilers, interpreters and other processors); C6150N (Distributed systems software)", fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "Center for Scientific Computing; Competition; Computer industry; Cray Future Generation MPP system; Efficient resource use; Heterogeneous resources; IBM SP2 distributed memory system; Large-scale parallelism; Load balancing; Massively parallel processing; Metacomputing; Parallel compiler technology; Parallel computing environment; Parallel programming; Parallel shared memory systems; Parallel tools selection; PVM clusters; RISC processors; Risks; Supported software tools; User base training; Vector system vendors", pubcountry = "Netherlands", thesaurus = "Cray computers; DP industry; Parallel processing; Parallelising compilers; Reduced instruction set computing; Resource allocation; Software tools; Training", } @Article{Kumar:1995:MWD, author = "S. Kumar and H. Adeli", title = "Minimum weight design of large structures on a network of workstations", journal = j-MICROCOMP-CIVIL-ENG, volume = "10", number = "6", pages = "423--432", month = nov, year = "1995", CODEN = "MCENE7", ISSN = "0885-9507", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Civil Eng., Ohio State Univ., Columbus, OH, USA", classification = "C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C5620L (Local area networks); C6150N (Distributed systems software); C7440 (Civil and mechanical engineering computing)", fjournal = "Microcomputers in Civil Engineering", keywords = "Coarse-grained applications; Computational capability; Distributed algorithm; Generic algorithms; Granularity; Large structures; Local area networks; Low cost; Message passing; Microprocessors; Minimum weight design; Optimization; Parallel Virtual Machine; Performance estimates; Software library; Structural optimization; Workstation network", thesaurus = "Distributed algorithms; Genetic algorithms; Local area networks; Message passing; Software libraries; Structural engineering computing", } @InProceedings{Leung:1995:EPE, author = "K.-C. Leung and M. Hamdi", title = "Evaluating {PVM} and {Express} on Various Network Clusters", crossref = "Alnuweiri:1995:PHF", pages = "57--66", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Li:1995:CPP, author = "Liwei Li and Paul S. Wang", title = "The {CL-PVM} Package", journal = j-SIGSAM, volume = "29", number = "3--4", pages = "2--8", month = dec, year = "1995", CODEN = "SIGSBZ", ISSN = "0163-5824 (print), 1557-9492 (electronic)", ISSN-L = "0163-5824", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Dept. of Math. and Comput. Sci., Kent State Univ., OH, USA", fjournal = "SIGSAM Bulletin", keywords = "artificial intelligence systems; C programs; CL-PVM package; Common Lisp interface; console program; expert systems; Fortran 77 interface; hostfile; knowledge-based systems; library functions; LISP; Lisp top level; Lisp-based; machines; open systems; operating systems (computers); parallel; Parallel Virtual Machine; parallel/concurrent computing facility; programming; programs; PVM Library routines; run-time server; software libraries; software package; software packages; symbolic computation systems; virtual", treatment = "P Practical", } @Article{Lin:1995:DNC, author = "Mengjou Lin and J. Hsieh and D. H. C. Du and J. P. Thomas and J. A. MacDonald", title = "Distributed network computing over local {ATM} networks", journal = j-IEEE-J-SEL-AREAS-COMMUN, volume = "13", number = "4", pages = "733--748", month = may, year = "1995", CODEN = "ISACEM", DOI = "https://doi.org/10.1109/49.382163", ISSN = "0733-8716 (print), 1558-0008 (electronic)", ISSN-L = "0733-8716", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Adv. Technol. Group, Apple Comput. Inc., Cupertino, CA, USA", classification = "B0290H (Linear algebra); B0290P (Differential equations); B6150M (Protocols); B6210L (Computer communications); B6230 (Switching centres and equipment); C4140 (Linear algebra); C4170 (Differential equations); C5220P (Parallel architecture); C5620L (Local area networks); C5640 (Protocols); C5670 (Network performance); C6150J (Operating systems)", fjournal = "IEEE Journal on Selected Areas in Communications", keywords = "Application programming interfaces; ASX-100 ATM switch; Asynchronous transfer mode; ATM API; BSD socket programming interface; Communication performance; Communication protocol layer; Distributed network computing; Distributed programming; End-to-end communication; Fore Systems; High-speed local area networks; High-speed network standards; Local ATM network; Message passing library; Parallel matrix multiplication; Parallel virtual machine; Performance characteristics; Processors; Remote procedure call; Workstations", thesaurus = "Application program interfaces; Asynchronous transfer mode; Local area networks; Matrix multiplication; Partial differential equations; Performance evaluation; Pipeline processing; Protocols; Remote procedure calls", } @Article{Liu:1995:WCD, author = "Xiaomao Liu", title = "Workstations cluster for distributed supercomputing", journal = j-MINI-MICRO-SYSTEMS, volume = "16", number = "2", pages = "45--52", month = feb, year = "1995", CODEN = "XWJXEH", ISSN = "1000-1220", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "North China Inst. of Comput. Technol., Beijing, China", classification = "C5620L (Local area networks); C6150N (Distributed systems software)", fjournal = "Mini-Micro Systems", keywords = "Active message communication; Distributed supercomputing; Global UNIX; MPI; Workstations cluster", language = "Chinese", pubcountry = "China", thesaurus = "Distributed processing; Local area networks", } @InProceedings{Lou:1995:PIN, author = "J. Z. Lou", title = "A parallel incompressible {Navier--Stokes} solver with multigrid iterations", crossref = "Bailey:1995:PSS", pages = "167--168", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Jet Propulsion Lab., California Inst. of Technol., Pasadena, CA, USA", classification = "A0230 (Function theory, analysis); A0260 (Numerical approximation and analysis); A0270 (Computational techniques); A4710 (General fluid dynamics theory, simulation and other computational methods); C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", keywords = "Domain-decomposition strategy; Efficient finite-difference incompressible Navier--Stokes fluid; Intel Delta; Intel Paragon; Message-passing; Multigrid iterations; Multigrid scheme; Parallel incompressible Navier--Stokes solver; Rectangular processor meshes; Second-order projection method; Staggered grid; Template code", thesaurus = "Finite difference methods; Message passing; Navier--Stokes equations; Parallel algorithms; Physics computing", } @InProceedings{Ludwig:1995:PPF, author = "T. Ludwig and S. Lamberts", title = "{PFSLib} --- a parallel file system for workstation clusters", crossref = "Malyshkin:1995:PCT", pages = "246--251", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. fur Inf., Tech. Univ. Munchen, Germany", classification = "C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems software)", keywords = "Parallel file system; Parallel programming environments; Performance results; PFS; PFSLib library; Workstation clusters", thesaurus = "File organisation; Parallel programming; Software performance evaluation", } @InProceedings{Lumsdaine:1995:WIM, author = "A. Lumsdaine and J. M. Squyres and M. W. Reichelt", title = "Waveform iterative methods for parallel solution of initial value problems", crossref = "IEEE:1995:PSP", pages = "88--97", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci. and Eng., Notre Dame Univ., IN, USA", classification = "C4130 (Interpolation and function approximation); C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems)", keywords = "Communication latency; Differential equations; Differential-algebraic equations; Initial value problems; Linear system; Message-passing; MOSFET simulation; MPI-based implementation; Parallel solution; Semiconductor device simulation program; Synchronization; Time dependent semiconductor drift-diffusion equations; Waveform iterative methods; Waveform relaxation; Workstations", thesaurus = "Circuit analysis computing; Differential equations; Digital simulation; Initial value problems; Iterative methods; Message passing; Parallel algorithms", } @InProceedings{Manke:1995:MPP, author = "J. W. Manke and J. C. Patterson", title = "Message passing performance of {Intel Paragon}, {IBM SP1} and {CRAY T3D} using {PVM}", crossref = "Bailey:1995:PSS", pages = "768--769", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Boeing Comput. Services, Seattle, WA, USA", classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", corpsource = "Boeing Comput. Services, Seattle, WA, USA", keywords = "all-to-all communication; All-to-all communication; Cray computers; CRAY T3D; distributed applications; Distributed applications; IBM computers; IBM SP1; Intel Paragon; message passing; Message passing performance; message passing time model; Message passing time model; MPP machines; multiprocessing systems; nodes; Nodes; performance; performance evaluation; PVM; recursive doubling; Recursive doubling; scalability; Scalability; speed; Speed; timing", thesaurus = "Cray computers; IBM computers; Message passing; Multiprocessing systems; Performance evaluation; Timing", treatment = "T Theoretical or Mathematical", } @InProceedings{Mantovani:1995:HPS, author = "M. L. Mantovani and M. Malagoli", title = "Highly parallel {SCF} calculation: the {SYSMO} Program", crossref = "IEEE:1995:PEW", pages = "502--507", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "CICAIA, Modena Univ., Italy", classification = "C4240P (Parallel programming and algorithm theory); C7320 (Physics and chemistry computing)", keywords = "Highly parallel SCF calculation; Linear Combination of Atomic Orbitals Self Consistent Field algorithm; Parallel implementation; Parallel Virtual Machine; Scalability; Single program multiple data level; SYSMO program; System Modena", thesaurus = "Chemistry; Chemistry computing; LCAO calculations; Parallel algorithms; SCF calculations", } @InProceedings{Martin:1995:DPC, author = "I. Martin and J. C. Fabero and F. Tirado and A. Bautista", title = "Distributed Parallel Computers versus {PVM} on a Workstation Cluster in the Simulation of Time Dependent Partial Differential Equations", crossref = "IEEE:1995:PEW", pages = "20--26", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. de Inf. y Autom., Univ. Complutense de Madrid, Spain", classification = "A0270 (Computational techniques); A0340K (Waves and wave propagation: general mathematical aspects); C4170 (Differential equations); C6110P (Parallel programming)", corpsource = "Dept. de Inf. y Autom., Univ. Complutense de Madrid, Spain", keywords = "distributed parallel computers; equation; finite difference method; large-scale problems; message passing; numerical simulation; parallel; parallel algorithms; parallel computing; parallel machine; partial differential equations; performance evaluation; PVM; Schr{\"o}dinger; Schr{\"o}dinger equation; Sparc-stations; time dependent; wave equations; workstation cluster; Workstation cluster; PVM; Distributed parallel computers; Time dependent; Partial differential equations; Parallel numerical simulation; Schr{\"o}dinger equation; Message passing parallel machine; Sparc-stations; Finite difference method; Parallel computing; Large-scale problems", sponsororg = "Euromicro; Assoc.Italiana per Inf. Calcolo Autom", thesaurus = "Parallel algorithms; Performance evaluation; Schr{\"o}dinger equation; Wave equations", treatment = "T Theoretical or Mathematical", } @Article{Matise:1995:PCG, author = "T. C. Matise and M. D. Schroeder and D. M. Chiarulli and D. E. Weeks", title = "Parallel Computation of Genetic Likelihoods Using {CRI-MAP}, {PVM}, and a Network of Distributed Workstations", journal = j-HUMAN-HEREDITY, volume = "45", number = "2", pages = "103--??", month = "????", year = "1995", CODEN = "HUHEAS", ISSN = "0001-5652", ISSN-L = "0001-5652", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Human Heredity", } @Article{Mattson:1995:PEP, author = "Timothy G. Mattson", title = "Programming Environments for Parallel and Distributed Computing: a Comparison of {P4}, {PVM}, {Linda}, and {TCGMSG}", journal = j-IJSAHPC, volume = "9", number = "2", pages = "138--161", month = "Summer", year = "1995", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover library database", abstract = "Parallel programmers must choose from a confusing array of parallel programming environments. When success has to be measured by the success of application-software development rather than theoretical research, the choice must be made quickly without the luxury of experimentation. In this paper, we help the programmer make this choice by looking closely at four of the most heavily used portable programming environments --- p4, PVM, TCGMSG, and Linda. For each of these programming environments, we look at three different programs: one that computes \$pi by numerical integration and two that benchmark communication performance. The four programming environments are analyzed in terms of performance, support, ease of coding, and ease of debugging.", acknowledgement = ack-nhfb, affiliation = "Intel Corp", affiliationaddress = "Beaverton, OR, USA", classification = "722.3; 722.4; 723.1; 723.5; 921.6; C0310F (Software development management); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Div. of Supercomputer Syst., INTEL Corp., Beaverton, OR, USA", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", journalabr = "Int J Supercomput Appl High Perform Comput", keywords = "Application software development; application software development; communication; Communication benchmarks; Communication performance; Computer software; Data communication systems; Distributed computer systems; Distributed computing; distributed computing; Integration; Linda; Numerical integration; numerical integration; P4; p4; parallel; Parallel processing systems; Parallel programmers; parallel programmers; parallel programming; Parallel programming environments; Parallel virtual machine; performance; Pi calculation; pi calculation; portability; Portable programming environments; portable programming environments; Program debugging; Programming environments; programming environments; PVM; software; Software engineering; software reviews; TCGMSG", thesaurus = "Parallel programming; Programming environments; Software portability; Software reviews", treatment = "P Practical", } @InProceedings{Mehra:1995:AIM, author = "P. Mehra and B. {Van Voorst} and J. Yan", title = "Automated Instrumentation, Monitoring and Visualization of {PVM} Programs", crossref = "Bailey:1995:PSS", pages = "832--837", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Recom Technol. Inc., NASA Ames Res. Center, Moffett Field, CA, USA", classification = "C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Recom Technol. Inc., NASA Ames Res. Center, Moffett Field, CA, USA", keywords = "Automated; automated instrumentation; Automated instrumentation; Automated Instrumentation and Monitoring System; data visualisation; Instrumentation and Monitoring System; message passing; message-passing parallel programs; Message-passing parallel programs; monitoring; parallel programming; parallel programs; Parallel programs; Parallel Virtual Machine; performance-debugging; Performance-debugging toolkit; program debugging; program monitoring; Program monitoring; program visualization; Program visualization; programming environments; PVM programs; system; toolkit; visual programming; workstation networks; Workstation networks", thesaurus = "Data visualisation; Message passing; Parallel programming; Program debugging; Programming environments; System monitoring; Visual programming", treatment = "P Practical", } @Article{Michielse:1995:PMU, author = "Peter Michielse", title = "Parallel multigrid using {PVM}", journal = j-APPL-NUM-MATH, volume = "19", number = "1-2", pages = "63--69", month = nov, year = "1995", CODEN = "ANMAEL", ISSN = "0168-9274 (print), 1873-5460 (electronic)", ISSN-L = "0168-9274", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper discusses an implementation of a parallel multigrid method using Parallel Virtual Machine (PVM). The basics described here apply to general parallel computers, either with shared memory or distributed memory systems. The actual implementation has been performed on both type of systems, although we will focus on distributed memory systems in this paper. The distributed memory implementation has been done using PVM on Convex MetaSeries machines. The speed-up results will be analyzed with respect to computational work and communicational overhead.", acknowledgement = ack-nhfb, affiliation = "CONVEX Computer Corp", affiliationaddress = "Utrecht, Neth", classification = "512.1.1; 721.1; 722.1; 722.4; 723.5; 921.6; C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C7310 (Mathematics computing)", corpsource = "CONVEX Comput. Corp., Utrecht, Netherlands", fjournal = "Applied Numerical Mathematics: Transactions of IMACS", journal-URL = "http://www.sciencedirect.com/science/journal/01689274", journalabr = "Appl Numer Math", keywords = "Approximation theory; communicational overhead; Computational methods; computational work; Computer simulation; Convex MetaSeries; Data storage equipment; differential equations; Distributed computer systems; distributed memory systems; Distributed memory systems; machines; mathematics computing; Numerical methods; parallel algorithms; parallel multigrid; Parallel multigrid; Parallel processing systems; parallel virtual machine; Parallel virtual machine; Petroleum reservoirs; Reservoir simulators; shared memory; Shared memory systems; systems", pubcountry = "Netherlands", treatment = "A Application; P Practical", } @Article{Mirvis:1995:HML, author = "Y. Mirvis and F. Abdi and B. Lajevardi and P. Murthy", title = "Hierarchical multi-level optimization solution for massive parallel simulation of composite system", journal = j-AIAA-ASME-ASCE-AHS-STRUCT-STRUCT-DYN-MAT-CONF, volume = "4", month = "????", year = "1995", CODEN = "CPSCDO", ISSN = "0273-4508", bibdate = "Fri May 24 09:58:00 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Multilevel optimization techniques have been utilized to speed up simulation processing time for the analysis and modeling of high temperature composite structures. The Network Computing System (NCS) tools were utilized using the GENOA-Maestro, and Parallel Virtual Machine (PVM) toolkits for heterogeneous distributed computing, to make it portable across the range of parallel hardware architecture.", acknowledgement = ack-nhfb, affiliation = "Alpha STAR Research Corp", affiliationaddress = "Los Angeles, CA, USA", classification = "408.1; 415.4; 721.1; 722.4; 723.5; 731.1", conference = "Proceedings of the 36th AIAA\slash ASME\slash ASCE\slash AHS\slash ASC Structures, Structural Dynamics, and Materials Conference and AIAA\slash ASME Adaptive Structures Forum. Part 4 (of 5)", fjournal = "AIAA/ASME/ASCE/AHS Structures, Structural Dynamics \& Materials Conference --- Collection of Technical Papers", journalabr = "AIAA ASME ASCE AHS Struct Struct Dyn Mater Conf Collect Tech Pap", keywords = "Composite structures; Computational complexity; Computer aided analysis; Computer simulation; Constraint theory; Hierarchical systems; Mathematical models; Multilevel optimization technique; Network computing system (NCS); Optimization; Parallel processing systems; Parallel virtual machine (PVM); Software package GENOA-Maestro; Software package PVM; Structural analysis", meetingdate = "Apr 10--13 1995", } @InProceedings{Morinishi:1995:PIB, author = "K. Morinishi and N. Satofuka", title = "Parallel implementation of the {Boltzmann} equation solvers using {PVM}", crossref = "Satofuka:1995:PCF", pages = "339--346", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A4710 (General fluid dynamics theory, simulation and other computational methods); A4745 (Rarefied gas dynamics); C4180 (Integral equations); C4240P (Parallel programming and algorithm theory); C7320 (Physics and chemistry computing); C7460 (Aerospace engineering computing)", corpsource = "Dept. of Mech. and Syst. Eng., Kyoto Inst. of Technol., Japan", keywords = "aerodynamics; aerospace; aerospace computing; BGK model; Boltzmann; Boltzmann collision integral; Boltzmann equation; CFD; computational; computing; digital simulation; equation solver; external flows; flow simulation; fluid dynamics; message; message passing software; NACA0012 airfoil; parallel programming; passing; physics computing; PVM; rarefied; rarefied gas flow", pubcountry = "Netherlands", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Mork:1995:DPP, author = "P. Mork", title = "Debugging parallel programs with execution tracing", crossref = "Ferenczi:1995:PAH", pages = "176--183", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Control Eng., Miskolc Univ., Hungary", classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6130B (Graphics techniques); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", keywords = "CASE tool; Converter; Datafile; Execution tracing; Graphical tool; PACVIS; ParaGraph; Parallel program debugging; Parallel software engineering; Pvm3; Raw trace data transformation; SEPP project; Software Engineering for Parallel Processing project; Tools; Trace file visualization; Visualization program", thesaurus = "Computer aided software engineering; Data visualisation; Parallel programming; Program debugging; Software tools", } @Article{Morton:1995:LLP, author = "Don Morton and Kefei Wang and David O. Ogbe", title = "Lessons learned in porting {Fortran\slash PVM} code to the {Cray T3D}", journal = j-IEEE-PAR-DIST-TECH, volume = "3", number = "1", pages = "4--11", month = "Spring", year = "1995", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/88.384580", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "With an extra work from the programmer, the Cray T3D offers low-level facilities for achieving substantial performance gains. Because of this, it is often necessary to consider the tradeoffs between performance gains and coding effort. Here, provided is a first-hand account of the issues in porting Fortran\slash PVM code to the Cray T3D. As a new evolving product, occasional problems with the T3D should be expected.", acknowledgement = ack-nhfb, affiliation = "Cameron Univ", affiliationaddress = "Lawton, OK, USA", classification = "722.2; 722.3; 722.4; 723.1; 723.1.1; 723.2; C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6140D (High level languages)", fjournal = "IEEE parallel and distributed technology: systems and applications", journalabr = "IEEE Parallel Distrib Technol", keywords = "Algorithms; Central processing unit; Central processing unit, Fortran/PVM code porting; Code porting; Codes (symbols); Coding effort; coding effort; Coding effort; coding effort; Computer aided software engineering; Computer software portability; Computer workstations; Cray computers; Cray T3D; Cray T3D computer; Data communication systems; Distributed computer systems; FORTRAN; Fortran (programming language); FORTRAN (programming language); Fortran/PVM code porting; Interfaces (computer); low-level facilities; Low-level facilities; low-level facilities; message passing; parallel machines; parallel programming; Parallel virtual machine; performance gains; Performance gains; performance gains; Program debugging; software portability; Software prototyping; Subroutines; Supercomputers", thesaurus = "Cray computers; FORTRAN; Message passing; Parallel machines; Parallel programming; Software portability", treatment = "P Practical", } @InProceedings{Nguyen:1995:SPI, author = "D. Nguyen and B. Hillberg", title = "Simulations of Pinhole Imaging for {AXAF}: Distributed Processing Using the {MPI} Standard", crossref = "Shaw:1995:ADA", pages = "361--366 (or 361--363??)", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A9575P (Mathematical and computer techniques in astronomy); C6110P (Parallel programming)C6185 (Simulation techniques); C6150N (Distributed systems software); C7350 (Astronomy and astrophysics computing)", conftitle = "Astronomical Data Analysis Software and Systems IV Meeting", corpsource = "Smithsonian Astrophys. Obs., Cambridge, MA, USA", keywords = "Application Programming Interface; astronomy computing; AXAF mission; digital simulation; distributed processing; LAM programming environment; Local Area Multicomputer; memory intensive task; message passing; Message Passing Interface; MPI standard; Ohio Supercomputer Center; parallel mode simulation; parallel processing; parallel programming; pinhole imaging; pinhole simulation program; sequential mode simulation; software packages; software portability; workstation cluster; X-ray astronomy", treatment = "T Theoretical or Mathematical; A Application", } @Article{Novotny:1995:BPP, author = "Mark Novotny", title = "{BOOKS}: {PVM} --- Parallel Virtual Machine: a Users' Guide and Tutorial for Networked Parallel Computing", journal = j-COMPUT-PHYS, volume = "9", number = "6", pages = "607--??", month = "????", year = "1995", CODEN = "CPHYE2", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computers in Physics", } @Article{Novotny:1995:BRA, author = "Mark Novotny and Susan McKay and Wolfgang Christian", title = "Book Review: {Al Geist, Adam Beguelin, Jack Dongarra, Weicheng Jiang, Robert Manchek, and Vaidy Sunderam, \booktitle{{PVM} --- Parallel Virtual Machine: a Users' Guide and Tutorial for Networked Parallel Computing}}", journal = j-COMPUT-PHYS, volume = "9", number = "6", pages = "607--??", month = nov, year = "1995", CODEN = "CPHYE2", DOI = "https://doi.org/10.1063/1.4823450", ISSN = "0894-1866 (print), 1558-4208 (electronic)", ISSN-L = "0894-1866", bibdate = "Wed Apr 10 08:45:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/computphys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://aip.scitation.org/doi/10.1063/1.4823450", acknowledgement = ack-nhfb, ajournal = "Comput. Phys", fjournal = "Computers in Physics", journal-URL = "https://aip.scitation.org/journal/cip", } @InProceedings{Nupairoj:1995:PES, author = "N. Nupairoj and L. M. Ni", title = "Performance evaluation of some {MPI} implementations on workstation clusters", crossref = "IEEE:1995:PSP", pages = "98--105", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6110B (Software engineering techniques); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", keywords = "communication library; Communication library; distributed memory systems; distributed-memory computing systems; Distributed-memory computing systems; high performance computing; High performance computing; message passing; Message Passing Interface; message-passing; Message-passing; MPI implementations; MPI specification; performance evaluation; Performance evaluation; software libraries; standards; workstation clusters; Workstation clusters; workstations", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Distributed memory systems; Message passing; Performance evaluation; Software libraries; Standards; Workstations", treatment = "P Practical", } @Article{Oakley:1995:ADR, author = "D. R. Oakley and N. F. {Knight, Jr.} and D. D. Warner", title = "Adaptive dynamic relaxation algorithm for non-linear hyperelastic structures. {III}. {Parallel} implementation", journal = j-COMPUT-METH-APPL-MECH-ENG, volume = "126", number = "1-2", pages = "111--129", month = sep, year = "1995", CODEN = "CMMECC", ISSN = "0045-7825, 0374-2830", ISSN-L = "0045-7825", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Appl. Res. Assoc. Inc., Raleigh, NC, USA", classification = "C4185 (Finite element analysis); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7440 (Civil and mechanical engineering computing)", fjournal = "Computer Methods in Applied Mechanics and Engineering", keywords = "128-Processor Intel hypercube; Adaptive dynamic relaxation algorithm; Efficient parallel implementation; Engineering workstation cluster; Frictionless contact; Interprocessor communication; Nonlinear hyperelastic structures; Nonlinear static analysis; Parallel-processing resource; PVM; Relative speedups; Scalability; Three-dimensional hyperelastic systems; Two-dimensional hyperelastic systems", pubcountry = "Netherlands", thesaurus = "Adaptive systems; Elasticity; Engineering workstations; Finite element analysis; Hypercube networks; Local area networks; Parallel algorithms; Relaxation; Structural engineering computing", } @InProceedings{Olszewski:1995:TCC, author = "Luke Olszewski", title = "A timing comparison of the conjugate gradient and {Gauss--Seidel} parallel algorithms in a one-dimensional flow equation using {PVM}", crossref = "ACM:1995:PAS", pages = "205--212", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The development of parallel processing came about due to the ineffectiveness of a single processor to accommodate the solutions of large scale problems in a reasonable amount of time. In this paper, we shall introduce one such problem, and discuss the implementation of two parallel algorithms applied to the linear approximations. This study will illustrate how an approximation method which has a faster rate of convergence may not necessarily produce the best solution time.", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Georgia Southern Univ.", affiliationaddress = "Statesboro, GA, USA", classification = "631.1; 722.4; 723.1; 723.5; 921.1; 921.6; C4130 (Interpolation and function approximation); C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C7310 (Mathematics computing); C7340 (Geophysics computing)", conference = "Proceedings of the 33rd Annual Southeast Conference", corpsource = "Dept. of Math. and Comput. Sci., Georgia Southern Univ., Statesboro, GA, USA", journalabr = "Proc Annu Southeast Conf", keywords = "Approximation theory; approximations; Computational methods; Conjugate gradient; conjugate gradient; conjugate gradient methods; Conjugate gradient parallel algorithms; Convergence; convergence; convergence of numerical; Convergence of numerical methods; Flow of fluids; Gauss Seidel parallel algorithms; Gauss--Seidel parallel algorithm; geophysics computing; hydrology; iterative methods; Large scale problems; large scale problems; Large scale systems; linear; Linear approximations; Mathematical models; mathematics computing; methods; Numerical methods; One dimensional flow equation; One-dimensional flow equation; one-dimensional flow equation; Parallel algorithms; parallel algorithms; Parallel processing; parallel processing; Parallel processing systems; Parallel virtual machine; PVM; Richards equation; Timing; timing; Timing comparison", meetingaddress = "Clemson, CA, USA", meetingdate = "Mar 17--18 1995", meetingdate2 = "03/17--18/95", sponsororg = "ACM", thesaurus = "Conjugate gradient methods; Convergence of numerical methods; Geophysics computing; Hydrology; Iterative methods; Mathematics computing; Parallel algorithms", treatment = "T Theoretical or Mathematical", } @InProceedings{Ouenes:1995:PRA, author = "A. Ouenes and W. W. Weiss and J. A. Sultan and J. Anwar", title = "Parallel Reservoir Automatic History Matching Using a Network of Workstations and {PVM}", crossref = "Anonymous:1995:RSS", pages = "125--134", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Panda:1995:GRW, author = "D. K. Panda", title = "Global reduction in wormhole k-ary n-cube networks with multidestination exchange worms", crossref = "IEEE:1995:PIP", pages = "652--659", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Ohio State Univ., Columbus, OH, USA", classification = "C4230M (Multiprocessor interconnection); C5220P (Parallel architecture); C5440 (Multiprocessing systems)", keywords = "Barrier synchronization operations; Communication startup time; Complete global reduction; Data size; Fast global reduction; Global reduction; Message passing interface standard; Multidestination exchange worms; Multidestination message passing mechanism; Pairwise exchange worms; System size; Unicast-based message passing; Wormhole k-ary n-cube networks", thesaurus = "Hypercube networks; Message passing; Synchronisation", } @InProceedings{Panda:1995:IDE, author = "D. K. Panda", title = "Issues in designing efficient and practical algorithms for collective communication on wormhole-routed systems", crossref = "Agrawal:1995:PIW", pages = "8--15", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. and Inf. Sci., Ohio State Univ., Columbus, OH, USA", classification = "C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", keywords = "Collective communication; Collective communication operations; Communication types; Message Passing Interface; MPI standard; Performance evaluation; Practical algorithms; Scalable parallel systems; Wormhole routed systems; Wormhole-routed systems", thesaurus = "Message passing; Multiprocessor interconnection networks; Parallel algorithms; Parallel machines", } @InProceedings{Pennington:1995:DHC, author = "R. L. Pennington", title = "Distributed and heterogeneous computing", crossref = "Vandoni:1995:CSC", pages = "25--57", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Pittsburgh Supercomputing Centre, PA, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", keywords = "C; Distributed computing; Fortran; Heterogeneous computing; Message passing; Programming; PVM", thesaurus = "Message passing; Parallel machines; Parallel programming; Virtual machines", } @InProceedings{Periyathamby:1995:NSG, author = "U. Periyathamby and B. C. Khoo and K. S. Yeo and Q. X. Wang", title = "A Numerical Simulation of the Growth and Collapse of Vapour Cavity Near a Free Surface on Distributed Computing Through {PVM}", crossref = "Bilger:1995:AFM", pages = "815--818", year = "1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Pfenning:1995:OCP, author = "J{\"o}rg-Thomas Pfenning and Christoph Moll", title = "Optimized communication patterns on workstation clusters", journal = j-PARALLEL-COMPUTING, volume = "21", number = "3", pages = "373--388", day = "10", month = mar, year = "1995", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:14:24 MDT 1999", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=3&aid=964", acknowledgement = ack-nhfb, affiliation = "Koln Univ., Germany", classification = "C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Koln Univ., Germany", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "Data parallel programming model; Dynamic loop scheduling algorithm; FDDI-ring; High startup latencies; Limited communication bandwidth; Matrix multiplication; Network usage; Optimized communication patterns; Parallel communications architecture; PVM message passing library; Sequential communication; Sparse communication patterns; Workstation clusters", pubcountry = "Netherlands", thesaurus = "Message passing; Parallel programming; Scheduling; Workstations", } @Article{Piscaglia:1995:DOC, author = "P. Piscaglia and B. Macq and P. Maes", title = "Distributed optimization of codebooks", journal = j-SIGNAL-PROCESS-IMAGE-COMMUN, volume = "7", number = "3", pages = "211--223", month = sep, year = "1995", CODEN = "SPICEF", ISSN = "0923-5965 (print), 1879-2677 (electronic)", ISSN-L = "0923-5965", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Univ. Catholique de Louvain, Belgium", classification = "B6120B (Codes); B6140C (Optical information, image and video signal processing); B6210L (Computer communications); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C5620L (Local area networks)", fjournal = "Signal Processing: Image Communication", keywords = "Codebooks; Codebooks optimization; Communication bandwidth minimisation; Computer network; Distributed optimization; Failure robustness; General-purpose workstations; Image processing algorithms; LBG algorithm; Load balancing; Parallel virtual machine; Processors synchronisation; Specialized library", pubcountry = "Netherlands", thesaurus = "Image coding; Local area networks; Parallel machines; Virtual machines; Workstations", } @InProceedings{Plank:1995:ADC, author = "J. S. Plank and Youngbae Kim and J. J. Dongarra", title = "Algorithm-based diskless checkpointing for fault tolerant matrix operations", crossref = "IEEE:1995:DPT", pages = "351--360", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Tennessee Univ., TN, USA", classification = "C4130 (Interpolation and function approximation); C4140 (Linear algebra); C6110B (Software engineering techniques); C6150N (Distributed systems software); C7300 (Natural sciences computing)", keywords = "Algorithm-based diskless checkpointing; Cholesky factorization; Distributed scientific computations; Fault tolerant matrix operations; Fault-tolerance; High-performance implementations; IBM SP2; Long-running scientific computations; Low overhead; LU factorization; Performance; Preconditioned conjugate gradient; Processors; PVM networks; QR factorization; SUN workstations; Workstation network platform", thesaurus = "Conjugate gradient methods; Local area networks; Matrix algebra; Natural sciences computing; Software fault tolerance; Subroutines; Workstations", } @InProceedings{Prasad:1995:PPB, author = "S. K. Prasad and K. M. Yu", title = "Performance of a {PVM-based} optimistic simulation testbed on different parallel architectures", crossref = "Hamza:1995:PII", pages = "511--514", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6185 (Simulation techniques)C7430 (Computer engineering)", corpsource = "Dept. of Math. and Comput. Sci., Georgia State Univ., Atlanta, GA, USA", keywords = "architectures; bus-based shared-memory; discrete event simulation; dynamic time; hypercube networks; hypercube-based parallel computer; local-memory; message granularity; multiprocessor; nCUBE-II; optimistic discrete event simulation testbed; Parallel; parallel; performance evaluation; PVM-based optimistic simulation testbed; RS-6000; shared memory; Silicon Graphics 4D/GTX; systems; Unix workstations; Virtual Machine package; virtual machines; window", sponsororg = "IASTED; ISMM", treatment = "X Experimental", } @InProceedings{Puskas:1995:LBW, author = "Z. Puskas", title = "Load Balancing on Workstation Clusters Using {PVM}", crossref = "Ferenczi:1995:PAH", pages = "112--123", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Meas. and Instrum. Eng., Tech. Univ. Budapest, Hungary", classification = "C5620L (Local area networks); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Dept. of Meas. and Instrum. Eng., Tech. Univ. Budapest, Hungary", keywords = "capacity sharing; computational; Computational capacity sharing; distributed; distributed parallel system; Distributed parallel system; Distributed programming; load balancing; Load balancing; local area networks; network operating systems; parallel; Parallel Virtual Machine; processor-farm technique; Processor-farm technique; programming; Programming; programming models; Programming models; PVM; resource allocation; workstation clusters; Workstation clusters; workstations", pubcountry = "Hungary", thesaurus = "Local area networks; Network operating systems; Parallel programming; Resource allocation; Workstations", treatment = "P Practical", } @InProceedings{Qaddouri:1995:MFS, author = "A. Qaddouri and R. Roy and B. Goulard", title = "Multigroup flux solvers using {PVM} [{Parallel Virtual Machine}]", crossref = "ANS:1995:MCR", volume = "2", pages = "1554--1562", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A2820H (Neutron diffusion); C7470 (Nuclear engineering computing)", corpsource = "Inst. de Genie Nucleaire, Ecole Polytech. de Montreal, Que., Canada", keywords = "collision probability; distributed memory; iterative; multigroup flux solvers; neutron transport theory; nuclear engineering computing; parallel processing; Parallel Virtual Machine; PVM; time-independent transport equation", sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan", treatment = "T Theoretical or Mathematical", } @MastersThesis{Qu:1995:FAS, author = "Su Qu", title = "Feature-driven area-based stereo matching method on {PVM}", type = "M.S. thesis", school = inst-UGA, address = inst-UGA:adr, pages = "x + 110", year = "1995", bibdate = "Mon Jan 15 15:32:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Directed by Hamid R. Arabnia.", acknowledgement = ack-nhfb, } @InProceedings{Rambu:1995:DSS, author = "N. Rambu and S. Stefan and D. Borsan and S. Talpos", title = "A diagnostic study of some meteorological fields simulated with {UKMO} and {MPI} atmospheric general circulation models", crossref = "Gates:1995:PFI", pages = "493--498", year = "1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Ramon:1995:PKV, author = "J. Ramon and P. Pena", title = "Parallelization of {KENO-Va Monte Carlo} code", journal = j-COMP-PHYS-COMM, volume = "88", number = "1", pages = "76--82", month = jul, year = "1995", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/0010-4655(95)00025-B", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/001046559500025B", acknowledgement = ack-nhfb, affiliation = "Consejo de Seguridad Nucl., Madrid, Spain", classification = "A2820H (Neutron diffusion); A2846E (Nuclear criticality safety); A2846G (Packaging and transportation of nuclear materials); C6110P (Parallel programming); C7470 (Nuclear engineering computing)", fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", keywords = "CONVEX C3440; Criticality; Distributed memory systems; FDDI network; Fuel storage pools; HP9000/735; KENO-Va code; Message-passing interface; Monte Carlo code; Parallelization; PVM; Random numbers; SCALE system; Shared memory machines; Shipping casks; Transport equation", pubcountry = "Netherlands", thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear criticality safety; Nuclear engineering computing; Nuclear materials packaging; Parallel programming", } @InProceedings{Ratha:1995:CUC, author = "N. K. Ratha and A. K. Jain and M. J. Chung", title = "Clustering using a coarse-grained parallel genetic algorithm: a preliminary study", crossref = "Cantoni:1995:CCA", pages = "331--338", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "C1180 (Optimisation techniques); C1250 (Pattern recognition); C4240P (Parallel programming and algorithm theory); C6150N (Distributed systems software)", keywords = "Coarse grained parallel genetic algorithm; Coarse-grained parallel genetic algorithm; Complex optimization problems; Data sets; Distributed algorithm; Distributed implementation; Divide and conquer approach; GAs; Near linear speedup; Optimal minimum squared error partition; Optimization problem; Pattern clustering; Preliminary study; PVM; Standard communication library; Standard K-means clustering algorithm; Workstation cluster", thesaurus = "Distributed algorithms; Divide and conquer methods; Genetic algorithms; Pattern recognition; Problem solving", } @InProceedings{Ratha:1995:DED, author = "N. K. Ratha and T. Acar and M. Gokmen and A. K. Jain", title = "A distributed edge detection and surface reconstruction algorithm", crossref = "Cantoni:1995:CCA", pages = "149--154", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Michigan State Univ., East Lansing, MI, USA", classification = "B0260 (Optimisation techniques); B6140C (Optical information, image and video signal processing); C1180 (Optimisation techniques); C1250 (Pattern recognition); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques)", keywords = "Associated energy functional; Cluster of workstations; Distributed edge detection; Graduated non-convexity; Image compression; Image restoration; Line process; Optimal edge assignment; Pixel gray valves; PVM communication library; Regularization techniques; Scalable parallel algorithm; Surface reconstruction algorithm; Weak membrane", thesaurus = "Computer vision; Edge detection; Image restoration; Parallel algorithms; Simulated annealing; Surface reconstruction", } @InProceedings{Reinefeld:1995:PVE, author = "A. Reinefeld and V. Schnecke", title = "Portability versus efficiency? Parallel applications on {PVM} and {Parix}", crossref = "Fritzson:1995:PPA", pages = "35--49", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1160 (Combinatorial mathematics); C1180 (Optimisation techniques); C5620L (Local area networks)C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Center for Parallel Comput., Paderborn Univ., Germany", keywords = "algorithm architecture; benchmarks; combinatorial mathematics; computing; efficiency; high-level programming environment; LAN; local area networks; massively parallel transputer system; moderately parallel Parsytec GC/PowerPlus; optimisation; parallel algorithms; parallel applications; parallel high-performance; parallel programming; Parix programming model; performance; portability; portable programming models; processors; programming environments; PVM programming model; software performance evaluation; software portability; system; T805; transputer systems; Unix; UNIX workstation cluster; workstations", pubcountry = "Netherlands", treatment = "P Practical", } @Article{Reynders:1995:OOO, author = "John V. W. Reynders and David W. Forslund and Paul J. Hinker and Marydell Tholburn and David G. Kilman and William F. Humphrey", title = "{OOPS}: an object-oriented particle simulation class library for distributed architectures", journal = j-COMP-PHYS-COMM, volume = "87", number = "1--2", pages = "212--224", day = "2", month = may, year = "1995", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/0010-4655(94)00172-X", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 21:29:54 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/001046559400172X", acknowledgement = ack-nhfb, affiliation = "Adv. Comput. Lab., Los Alamos Nat. Lab., NM, USA", classification = "A0270 (Computational techniques); A0520G (Classical ensemble theory); C5220P (Parallel architecture); C6110J (Object-oriented programming); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", keywords = "Connection Machine CM5; CRI T3D; Distributed architectures; Materials science; Object-oriented particle simulation class library; OOPS; Plasma physics; Porous media; Portable code; PVM clusters; Suspension flows; Uniform high-level interface; Vortex simulations", pubcountry = "Netherlands", thesaurus = "C listings; Digital simulation; Multiprocessing programs; Object-oriented programming; Parallel architectures; Physics computing; Software libraries; Software portability; Statistical mechanics", } @Article{Ross:1995:DCM, author = "D. L. Ross and J. S. Collins and J. H. George", title = "A dynamic capacity model using concurrent processing", journal = j-NEURAL-PAR-SCI-COMPUT, volume = "3", number = "2", pages = "249--262", month = jun, year = "1995", CODEN = "NPACEM", ISSN = "1061-5369", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math., Embry-Riddle Aeronaut. Univ., Daytona Beach, FL, USA", classification = "C1180 (Optimisation techniques); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C7430 (Computer engineering); C7460 (Aerospace engineering computing)", fjournal = "Neural, Parallel and Scientific Computations", keywords = "Airport capacity optimisation; Concurrent processing; Concurrent programming; Dynamic capacity model; Iterative method; National Airspace System; Optimal capacity profiles; Parallel virtual machine; Public domain software PVM; Time-varying index", thesaurus = "Aerospace computing; Optimisation; Parallel processing; Public domain software; Virtual machines", } @Article{Schafers:1995:TGP, author = "L. Schafers and C. Scheidler and O. Kramer-Fuhrmann", title = "{TRAPPER}: a graphical programming environment for parallel systems", journal = j-FUT-GEN-COMP-SYS, volume = "11", number = "4-5", pages = "351--361", month = aug, year = "1995", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Res. and Technol, Daimler-Benz AG, Berlin, Germany", classification = "C6110P (Parallel programming); C6110V (Visual programming); C6115 (Programming support); C6130B (Graphics techniques); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "Communicating sequential processes; Communication loads; Computation loads; Configtool; Designtool; Graphical programming environment; Graphical representation; High computing power; Hybrid program development; Industrial applications; Interprocess communication; Mapping; Monitoring system; Optimization; Parallel process structure; Parallel systems; Software event recording; System design; Target hardware configuration; Textual representations; TRAPPER; Visualization", pubcountry = "Netherlands", thesaurus = "Communicating sequential processes; Computer animation; Data visualisation; Local area networks; Parallel machines; Parallel programming; Programming environments; Software tools; System monitoring; Transputer systems; Visual programming", } @InProceedings{Schuster:1995:CSM, author = "G. Schuster and F. Breitenecker", title = "Coupling Simulators with the Model Interconnection Concept and {PVM}", crossref = "Breitenecker:1995:ESC", pages = "321--326", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "ARGE Simulation News, Tech. Univ. of Vienna, Austria", classification = "C6140D (High level languages); C6150N (Distributed systems software); C6185 (Simulation techniques)", corpsource = "ARGE Simulation News, Tech. Univ. of Vienna, Austria", keywords = "ACSL; communication; continuous simulation; Continuous simulation systems; digital simulation; message passing; message passing system PVM; Message passing system PVM; model interconnection concept; Model interconnection concept; MOSIS; Mosis; program; Program communication; PVM; simulation languages; systems", pubcountry = "Netherlands", thesaurus = "Digital simulation; Message passing; Simulation languages", treatment = "P Practical", } @Article{Sekharan:1995:LBM, author = "Chandra N. Sekharan and Vineet Goel and R. Sridhar", title = "Load balancing methods for ray tracing and binary tree computing using {PVM}", journal = j-PARALLEL-COMPUTING, volume = "21", number = "12", pages = "1963--1978", day = "12", month = dec, year = "1995", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 06 18:01:04 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=12; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=12&aid=1028", abstract = "We propose efficient load balancing methods for two computational problems namely ray tracing and bottom-up binary tree computing in a distributed environment. In the context of ray tracing, we propose a variant of a static load balancing technique presented in [15] where the sampling is based on partitioning the object space. Our approach partitions the image instead and uses an efficient scheduling technique for load balancing. Computations carried out on a binary tree arise naturally in image processing and network optimization problems. Many of these problems are solved efficiently in parallel by the popular tree contraction technique [1]. In this paper, we explore the tree-contraction technique in a distributed setting using the grain packing method [9]. Implementations of our algorithms on a cluster of workstations using Parallel Virtual Machine (PVM) [6] demonstrate near-perfect load balancing.", acknowledgement = ack-nhfb, affiliation = "Loyola Univ of Chicago", affiliationaddress = "Chicago, IL, USA", classification = "721.1; 722.3; 722.4; 723.1; 723.2; 921.4; C1160 (Combinatorial mathematics); C4240P (Parallel programming and algorithm theory); C6130B (Graphics techniques)", corpsource = "Dept. of Math. and Comput. Sci., Loyola Univ., Chicago, IL, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Algorithms; Binary tree computing; binary tree computing; Computational complexity; Computer graphics; Computer networks; Computer workstations; distributed algorithms; Distributed computer systems; Distributed environment; Grain packing methods; Image processing; Load balancing; load balancing; Machine; network optimization; Optimization; Parallel processing systems; Parallel Virtual; Parallel virtual machine; partitioning; PVM; Ray tracing; ray tracing; Resource allocation; resource allocation; Scheduling; scheduling technique; Tree contraction technique; Trees (mathematics); trees (mathematics); Workstation cluster", pubcountry = "Netherlands", treatment = "T Theoretical or Mathematical", } @Article{Shen:1995:PSM, author = "H. Shen", title = "Parallel $k$-set mutual range-join in hypercubes", journal = j-MICROPROC-MICROPROG, volume = "41", number = "7", pages = "443--448", month = nov, year = "1995", CODEN = "MMICDT", ISSN = "0165-6074 (print), 1878-7061 (electronic)", ISSN-L = "0165-6074", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. and Inf. Technol., Griffith Univ., Nathan, Qld., Australia", classification = "C4230M (Multiprocessor interconnection); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5470 (Performance evaluation and testing); C5670 (Network performance)", fjournal = "Microprocessing and Microprogramming", keywords = "Data comparisons; Hypercubes; Mutual range-join; Parallel algorithm; Parallel k-set mutual range-join; Performance; Permutation-based range-join; PVM; Tuples; Worst case", pubcountry = "Netherlands", thesaurus = "Hypercube networks; Parallel algorithms; Performance evaluation", } @InProceedings{Siegelin:1995:BPW, author = "C. Siegelin and U. Finger and C. O'Donnell", title = "Boosting the performance of workstations through {WARPmemory}", crossref = "Haridi:1995:EPP", pages = "703--706", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. Inf., Ecole Nat. Superieure des Telecommun., Paris, France", classification = "C5310 (Storage system design); C5540 (Terminals and graphic displays); C5620L (Local area networks); C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems software)", keywords = "Improved workstation performance; Local network; Parallel program execution; Performance optimization; Physically shared memory; PVM; Running system; Serially multiported memory; Standard programming interface; WARPmemory; Workstation network", thesaurus = "Application program interfaces; Local area networks; Memory architecture; Message passing; Parallel programming; Shared memory systems; Workstations", } @InProceedings{Silva:1995:PCR, author = "L. M. Silva and J. G. Silva and S. Chapple and L. Clarke", title = "Portable checkpointing and recovery", crossref = "IEEE:1995:PFI", pages = "188--195", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. de Engenharia Inf., Coimbra Univ., Portugal", classification = "C6110B (Software engineering techniques); C6150J (Operating systems)", keywords = "Data-reconfiguration; F CHIMP/MPI; Flexible recovery mechanism; Parallel library; Portability; Portable checkpointing; Recovery", thesaurus = "Operating systems [computers]; Parallel machines; Software portability; System recovery", } @InProceedings{Simmunovic:1995:MIP, author = "S. Simmunovic and T. Zacharia and N. Baltas and D. B. Spalding", title = "{MPI} Implementation of {Phoenics}: a General Purpose Computational Fluid Dynamics Code", crossref = "Tentner:1995:HPC", pages = "122--127", year = "1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Simunovic:1995:MIP, author = "S. Simunovic and T. Zacharia and N. Baltas and D. B. Spalding", title = "{MPI} implementation of {PHOENICS}: a general purpose computational fluid dynamics code", crossref = "Tentner:1995:HPC", pages = "122--127", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A4725Q (Convection and heat transfer); A4770F (Chemically reactive flows); A8240 (Chemical kinetics and reactions: special regimes); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing)", conftitle = "Proceedings High Performance Computing `95", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "chemical reactions; chemically reactive flow; chemistry computing; computational analysis programs; digital simulation; dynamical reaction process simulation; EARTH parallel version; flow simulation; fluid dynamics; fluid flow simulation; general purpose computational fluid dynamics code; heat transfer; heat transfer simulation; heterogeneous computer networks; high performance computing; Intel Paragon XP/S 35; Intel Paragon XP/S 5; Kendall Square Research; large scale computational simulations; massively parallel supercomputers; message passing; Message Passing Interface standard; MPI libraries; multiprocessing systems; multiprocessor SGI Onyx computer; parallel architectures; parallel machines; PHOENICS; physics computing; portable computational tool; program testing; scalable performance; software packages; software performance evaluation; software portability", sponsororg = "SCS", treatment = "P Practical", } @Article{Sitsky:1995:IPM, author = "D. Sitsky and D. Walsh and C. Johnson", title = "Implementation and performance of the {MPI} message passing interface on the {Fujitsu AP1000} multicomputer", journal = j-AUSTRALIAN-COMP-SCI-COMM, volume = "17", number = "1", pages = "475--481", month = "????", year = "1995", CODEN = "ACSCDD", ISSN = "0157-3055", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Australian Nat. Univ., Canberra, ACT, Australia", classification = "C6110P (Parallel programming); C6150N (Distributed systems software); C6155 (Computer communications software)", conflocation = "Glenelg, SA, Australia; 1-3 Feb. 1995", conftitle = "Eighteenth Australasian Computer Science Conference. ACSC'95", corpsource = "Dept. of Comput. Sci., Australian Nat. Univ., Canberra, ACT, Australia", fjournal = "Australian Computer Science Communications", keywords = "application program interfaces; benchmarks; Benchmarks; broadcasting; clustered systems; Clustered systems; collective routines; Collective routines; computer communications software; Fujitsu AP1000 multicomputer; group-wide broadcast; Group-wide broadcast; hardware operations; Hardware operations; implementation; Implementation; message passing; Message Passing Interface; MPI; multiprocessing systems; native calls; Native calls; operating system; Operating system; parallel libraries; Parallel libraries; parallel programming; performance; Performance; portability; Portability; selective broadcast operation; Selective broadcast operation; software libraries; software performance evaluation", pubcountry = "Australia", thesaurus = "Application program interfaces; Broadcasting; Computer communications software; Message passing; Multiprocessing systems; Parallel programming; Software libraries; Software performance evaluation", treatment = "P Practical", } @InProceedings{Sivaraman:1995:PSP, author = "H. Sivaraman and C. S. Raghavendra", title = "Parallelizing sequential programs to a cluster of workstations", crossref = "Agrawal:1995:PIW", pages = "38--41", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. Eng. and Comput. Sci., Washington State Univ., Pullman, WA, USA", classification = "C5440 (Multiprocessing systems); C6115 (Programming support); C6150C (Compilers, interpreters and other processors)", keywords = "ADAPTOR; ADDT; Automatic parallelization system; AZTEC; Benchmark programs; Cluster of workstations; Data distribution tool; GUI; HPF compiler; Parafrase-2 parallelizing compiler; PVM; Sequential programs parallelisation; Source file", thesaurus = "Parallel processing; Parallelising compilers; Software tools; Workstations", } @Article{Skjellum:1995:EAM, author = "Anthony Skjellum and Ewing Lusk and William Gropp", title = "Early applications in the {Message-Passing Interface} ({MPI})", journal = j-IJSAHPC, volume = "9", number = "2", pages = "79--94", month = "Summer", year = "1995", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We describe a number of early efforts to make use of the Message-Passing Interface (MPI) standard in applications, based on an informal survey conducted in May-June, 1994. Rather than a definitive statement of all MPI developmental work, this paper addresses the initial successes, progress, and impressions that application developers have had with MPI, according to the responses received. We summarize the important aspects of each survey response, and draw conclusions about the spread of MPI into applications. An understanding of message passing and access to the MPI standard are prerequisites for appreciating this paper. Some background material is provided to ease this requirement.", acknowledgement = ack-nhfb, affiliation = "Mississippi State Univ", affiliationaddress = "Mississippi State, MS, USA", classification = "722.2; 722.3; 722.4; 902.2; C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", journalabr = "Int J Supercomput Appl High Perform Comput", keywords = "Application developers; application developers; Computer hardware; Data communication systems; message passing; Message passing interface (MPI); Message-Passing Interface; MPI standard; Network protocols; software engineering; software standards; Standards; Survey; survey; User interfaces", thesaurus = "Message passing; Software engineering; Software standards", treatment = "P Practical", } @InProceedings{Skjellum:1995:EMP, author = "A. Skjellum and N. E. Doss and K. Viswanathan and A. Chowdappa and P. V. Bangalore", title = "Extending the message passing interface ({MPI})", crossref = "IEEE:1995:PSP", pages = "106--118", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", keywords = "computer networks; Computer networks; Europe; high performance computing; High performance computing; intercommunicator extensions; Intercommunicator extensions; message passing; message passing interface; Message passing interface; message passing standard; Message passing standard; MPI Forum; multicomputers; Multicomputers; multinational vendors; Multinational vendors; national laboratories; National laboratories; processor scheduling; research centers; Research centers; scheduling; Scheduling; standards; United States; universities; Universities; workstations; Workstations", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Computer networks; Message passing; Processor scheduling; Standards", treatment = "P Practical", } @Article{Smith:1995:CRC, author = "K. A. Smith and A. J. Baratta and G. E. Robinson", title = "Coupled {RELAP5} and {CONTAIN} Accident Analysis Using {PVM}", journal = j-NUCLEAR-SAFETY, volume = "36", number = "1", pages = "94--108", month = jan # "--" # jun, year = "1995", CODEN = "NUSAAZ", ISSN = "0029-5604", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This article describes the development of an integrated accident analysis capability considering both reactor vessel and containment system responses. This integrated package, which uses the RELAP5 and CONTAIN computer codes, provides the user with greater accuracy and modeling flexibility when compared with accident analyses using these codes separately. Multiprocessing, together with message-passing-based data transfer, enables these concurrent RELAP5 and CONTAIN calculations. The data transfer facilitates the coupling between the reactor vessel and containment portions of the calculation. The Parallel Virtual Machine software system running on a network of IBM RISC System\slash 6000 workstations provided the multiprocessing capabilities required for this work. The results of an anticipated-transient-without-scram scenario for a boiling-water reactor nuclear power plant are provided. For the scenario analyzed, the containment temperatures and pressures that were predicted on the basis of the stand-alone codes and standard analysis methods were lower (i.e., less conservative) than those predicted with the use of the integrated code package.", acknowledgement = ack-nhfb, affiliation = "Oak Ridge Natl Lab", affiliationaddress = "Oak Ridge, TN, USA", classification = "621; 641.1; 723.2; 723.5; 914.1; 921.6; A2841C (Computer codes for fission reactor theory and design); A2844 (Fission reactor protection systems, safety and accidents); A2850G (Light water reactors); C6150N (Distributed systems software); C7470 (Nuclear engineering computing)", corpsource = "Oak Ridge Nat. Lab., TN, USA", fjournal = "Nuclear safety", journalabr = "Nucl Saf", keywords = "anticipated-; Boiling water reactors; boiling-water reactor; BWR; Calculations; capability; Codes (symbols); Computer aided analysis; computer codes; concurrent RELAP5/CONTAIN calculations; CONTAIN computer codes; containment; containment system responses; containment temperatures; Containment vessels; coupled RELAP5/CONTAIN accident analysis; Data transfer; engineering computing; engineering workstations; fission; fission reactor accidents; fission reactor design; IBM computers; IBM RISC System/6000; instruction set computing; integrated accident analysis; integrated package; Machine software system; message passing; message-passing-based data transfer; modeling flexibility; multiprocessing; multiprocessing capabilities; multiprocessing programs; nuclear; nuclear power plant; nuclear power stations; Nuclear reactor accidents; Parallel processing systems; Parallel Virtual; Parallel virtual machine software system; Pressure; pressures; reactor containment; reactor vessel; Reactor vessel and containment system; reduced; RELAP5 computer codes; software packages; stand-alone codes; Temperature; transient-without-scram scenario; workstations", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Stagg:1995:IPN, author = "A. K. Stagg and D. D. Cline and G. F. Carey", title = "Implementing a parabolized {Navier--Stokes} flow solver on the {Cray T3D}", crossref = "Bailey:1995:PSS", pages = "143--148", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Jet Propulsion Lab., Cray Res. Inc., Pasadena, CA, USA", classification = "C4170 (Differential equations); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7310 (Mathematics computing)", keywords = "Cray T3D; Globally addressable memory; Hyperbolic parabolic system; Interprocessor communication routines; Large-scale simulation; Library calls; Massively parallel architectures; Massively parallel computers; Message passing; Parabolized Navier--Stokes flow solver; Parallel Virtual Machine; Performance; Performance results", thesaurus = "Cray computers; Hyperbolic equations; Mathematics computing; Message passing; Navier--Stokes equations; Parabolic equations; Parallel algorithms; Parallel machines; Software performance evaluation", } @InProceedings{Stals:1995:AMP, author = "L. Stals", title = "Adaptive multigrid in parallel", crossref = "Bailey:1995:PSS", pages = "367--372", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Australian Nat. Univ., Canberra, ACT, Australia", classification = "C4170 (Differential equations); C4185 (Finite element analysis); C6110P (Parallel programming); C6150N (Distributed systems software); C7310 (Mathematics computing)", keywords = "Adaptive multigrid; Adaptive refinement methods; C++; Elliptic partial differential equations; Finite element method; Kernighan-Lin method; Load balancing; MIMD architectures; Multigrid methods; Multigrid programs; Newest node bisection; Parallel multigrid; Polygonal region; PVM; Square domains; Structured grids; Uniform grids; Unstructured grids", thesaurus = "Elliptic equations; Finite element analysis; Mathematics computing; Parallel machines; Parallel programming; Partial differential equations; Resource allocation", } @InProceedings{Stankovski:1995:MPA, author = "Z. Stankovski", title = "A Massively Parallel Algorithm for the Collision Probability Calculations in the {APOLLO-II} Code Using the {PVM} Library", crossref = "ANS:1995:MCR", volume = "2", pages = "1573--1583", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A2820H (Neutron diffusion); A2841C (Computer codes for fission reactor theory and design); C6110P (Parallel programming); C7470 (Nuclear engineering computing)", corpsource = "Dept. de Mecanique et Technol., Commissariat a l`Energie Atomique, Gif sur Yvette, France", keywords = "APOLLO-; collision probability; host/node programmation model; II code; massively parallel algorithm; message passing; neutron transport; neutron transport theory; nuclear engineering computing; parallel algorithms; parallel programming; parallelization; PVM library", sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan", treatment = "T Theoretical or Mathematical", } @InProceedings{Stathopoulos:1995:DLB, author = "A. Stathopoulos and A. Ynnerman", title = "Dynamic load balancing of atomic structure programs on a {PVM} cluster", crossref = "Hertzberger:1995:HPM", pages = "384--391", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville, TN, USA", classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6150J (Operating systems)", corpsource = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville, TN, USA", keywords = "allocation; atomic data; Atomic data; atomic structure programs; Atomic structure programs; cluster; dedicated cluster of; Dedicated cluster of workstations; dynamic load balancing; Dynamic load balancing; machine; MCHF package; parallel processing; parallel virtual; Parallel virtual machine; perfect load balancing; Perfect load balancing; performance evaluation; PVM; PVM cluster; resource; workstations", pubcountry = "Germany", thesaurus = "Parallel processing; Performance evaluation; Resource allocation", treatment = "A Application; P Practical", } @Article{Stellner:1995:CMP, author = "G. Stellner and M. Schumann and M. Girnghuber", title = "Comparing message-passing libraries with the {SPY} analysis environment", journal = j-IT-IT, volume = "37", number = "2", pages = "46--52", month = apr, year = "1995", CODEN = "ITINEV", ISSN = "0944-2774", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Inst. fur Inf., Tech. Univ. Munchen, Germany", classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support)", fjournal = "Informationstechnik und technische Informatik: IT + TI", keywords = "Computational hard problems; Message-passing libraries; Multi-user environments; Networks of workstations; NXLib; P4; PVM; SPY analysis environment; Virtual parallel computer", language = "German", pubcountry = "Germany", thesaurus = "Message passing; Parallel processing; Parallel programming; Programming environments", } @InProceedings{Stubbs:1995:ICE, author = "S. S. Stubbs and D. L. Carver", title = "{IPCC++}: a {C++} extension for interprocess communication with objects", crossref = "IEEE:1995:PNA", pages = "205--210", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci., Louisiana State Univ., Baton Rouge, LA, USA", classification = "C6110J (Object-oriented programming); C6110P (Parallel programming); C6140D (High level languages); C6150N (Distributed systems software); C7430 (Computer engineering)", keywords = "Asynchronous communication; C++ extension; Concurrency primitives; Distributed memory models; Dynamic process creation; Explicit concurrency; Inheritance; Inter-object concurrency; Interprocess communication objects; IPCC++; Language model; Object-oriented programming languages; Orthogonality; Parallel Virtual Machine; PVM; Selective waiting; Socket-based application program interface; Static process creation; Synchronous communication; Typed message passing system; UNIX interprocess communication system calls abstraction", thesaurus = "C language; Distributed memory systems; Inheritance; Message passing; Object-oriented languages; Parallel programming; Unix; Virtual machines", } @InProceedings{Sunderam:1995:RIH, author = "V. S. Sunderam", title = "Recent initiatives in heterogeneous parallel computing", crossref = "Gray:1995:PCT", pages = "1--16", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6110P (Parallel programming); C6150N (Distributed systems software)", keywords = "Concurrent computing; Concurrent distributed computing; Distributed computing; Heterogeneous parallel computing; Lightweight process; Parallel virtual machine; Performance enhancement; Performance evaluation; PVM; Research initiative; Thread", thesaurus = "Open systems; Parallel processing; Performance evaluation; Virtual machines", } @InProceedings{Suresh:1995:IOP, author = "H. Suresh", title = "Implementation of an optimal parallel algorithm for arithmetic expression parsing", crossref = "Narashimhan:1995:IIF", pages = "925 vol.2", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Fac. of Sci. and Technol., Griffith Univ., Brisbane, Qld., Australia", classification = "C4240P (Parallel programming and algorithm theory); C5230 (Digital arithmetic methods); C6110P (Parallel programming); C6150C (Compilers, interpreters and other processors)", keywords = "Arithmetic expression parsing; Concurrent processing environment; Optimal parallel algorithm; Parallel computer architectures; PVM; SIMD parallel architecture; Simple recursive descent parser", thesaurus = "Digital arithmetic; Parallel algorithms; Parallelising compilers", } @InProceedings{Suresh:1995:PIQ, author = "H. Suresh", title = "{PVM} implementation of quadtree building algorithms on {SIMD} hypercube system", crossref = "Narashimhan:1995:IIF", volume = "2", pages = "855--858 (vol. 2)", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Microelectron. Eng., Griffith Univ., Brisbane, Qld., Australia", classification = "C1160 (Combinatorial mathematics); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming); C6120 (File organisation)", corpsource = "Sch. of Microelectron. Eng., Griffith Univ., Brisbane, Qld., Australia", keywords = "algorithms; bilevel; Bilevel images; DOS operating system; Hierarchical data structures; hierarchical data structures; hypercube networks; hypercube system; IBM compatible PCs; image processing; images; Linear quadtrees; linear quadtrees; parallel algorithms; Parallel Virtual Machine System; Parallel Virtual Machine System Software; PVM implementation; Quadtree building algorithms; quadtree building algorithms; quadtrees; SIMD; SIMD hypercube system; Single Instruction Multiple Data hypercube; Single Instruction Multiple Data hypercube algorithms; Software", sponsororg = "Parallel Algorithms, Archit. and Software Eng. Res. Lab.; IEEE; IEEE Comput. Soc.; ACM; Euromicro; IBM; Instn. Eng. Australia; Inst. Radio and Electron. Eng. Soc.; Australian Comput. Soc", thesaurus = "Hypercube networks; Image processing; Parallel algorithms; Quadtrees", treatment = "P Practical", } @Article{Swanson:1995:PAP, author = "Eric Swanson and Terry P. Lybrand", title = "{PVM-AMBER}: a parallel implementation of the {AMBER} molecular mechanics package for workstation clusters", journal = j-J-COMPUT-CHEM, volume = "16", number = "9", pages = "1131--1140", month = sep, year = "1995", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.540160907", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Thu Nov 29 14:54:31 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Center for Bioeng., Washington Univ., Seattle, WA, USA", classification = "A3620C (Macromolecular conformation (statistics and dynamics)); A8710 (General, theoretical, and mathematical biophysics); A8715D (Physical chemistry of biomolecular solutions; A8715H (Biomolecular dynamics, molecular probes, molecular pattern recognition); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing); C7330 (Biology and medical computing); condensed states)", corpsource = "Center for Bioeng., Washington Univ., Seattle, WA, USA", fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", keywords = "AMBER molecular mechanics package parallel version; biology computing; biomolecular simulation; Biomolecular simulation problems; Computational speedup; computational speedup; computations; Data exchange; data exchange; digital simulation; efficiency; Ethernet; FDDI; FDDI connections; free energy; Free-energy perturbation computations; free-energy perturbation computations; intermolecular mechanics; Lipid bilayer systems; lipid bilayer systems; lipid bilayers; local area; molecular biophysics; molecular dynamics; Molecular dynamics computations; molecular dynamics method; networks; Nonbonded energies; nonbonded energies; Nonbonded forces; nonbonded forces; Nonbonded pair list generation; nonbonded pair list generation; packages; parallel; Parallel efficiency; parallel processing; peptide; perturbation theory; problems; Processor synchronization; processor synchronization; Protein; protein; proteins; PVM message-passing software; PVM-AMBER; Silicon Graphics; software; solvated; Solvated peptide; Test simulations; test simulations; Unix; Unix workstations; Workstation clusters; workstation clusters; workstations", onlinedate = "7 Sep 2004", thesaurus = "Biology computing; Digital simulation; FDDI; Free energy; Intermolecular mechanics; Lipid bilayers; Local area networks; Molecular biophysics; Molecular dynamics method; Parallel processing; Perturbation theory; Proteins; Software packages; Unix; Workstations", treatment = "P Practical", } @InProceedings{Ten:1995:TPE, author = "S. V. Ten and V. V. Savchenko and A. A. Pasko", title = "Time performance evaluation of implicit surface polygonization on distributed systems", crossref = "Gray:1995:PCT", pages = "183--193", year = "1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Distributed Parallel Process. Lab., Aizu Univ., Aizu-Wakamatsu City, Japan", classification = "C4240P (Parallel programming and algorithm theory); C4260 (Computational geometry); C6130B (Graphics techniques); C6150N (Distributed systems software); C7310 (Mathematics computing); C7400 (Engineering computing)", keywords = "CAD system; Complex surfaces; Distributed systems; Functions; Implicit functions; Implicit surface polygonization; Mathematics; Parallelization; Polygonal approximation; PVM system; Rendering; Scalable algorithm; Software algorithm; Solids; Time performance evaluation; Toroidal architecture; Transputer network; Visual analysis", thesaurus = "CAD; Computational geometry; Data visualisation; Engineering graphics; Functions; Mathematics computing; Message passing; Parallel algorithms; Parallel architectures; Rendering [computer graphics]; Software performance evaluation; Transputer systems", } @InProceedings{Tsunekawa:1995:EIE, author = "H. Tsunekawa", title = "Effective implementation of {EDEM} workstation cluster using {PVM}", crossref = "Pahl:1995:CCB", pages = "503--508", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Uhl:1995:AWA, author = "A. Uhl", title = "Adapted wavelet analysis on moderate parallel distributed memory {MIMD} architectures", crossref = "Ferreira:1995:PAI", pages = "275--283", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Res. Inst. for Softwaretechnol., Salzburg Univ., Austria", classification = "B0230 (Integral transforms); B0290Z (Other numerical methods); B6120B (Codes); B6140C (Optical information, image and video signal processing); C1130 (Integral transforms); C4190 (Other numerical methods); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5260B (Computer vision and image processing techniques); C6150N (Distributed systems software)", keywords = "Adapted wavelet analysis; Algorithm efficiency; Decomposition; Image compression; Moderate parallel distributed memory MIMD architectures; PVM; Subband based parallelization; Wavelet packet best basis selection; Workstation cluster", thesaurus = "Distributed memory systems; Image coding; Parallel algorithms; Parallel architectures; Wavelet transforms", } @InProceedings{Uhl:1995:PCC, author = "A. Uhl", title = "Parallel Compact Coding of Satellite Images with Wavelet Packets using {PVM}", crossref = "Prasanna:1995:FIP", pages = "382--387", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6120B (Codes); B6140C (Optical information, image and video signal processing); C4190 (Other numerical methods); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6130 (Data handling techniques); C7460 (Aerospace engineering computing)", corpsource = "Res. Inst. for Softwaretechnol., Salzburg Univ., Austria", keywords = "aerospace computing; artificial satellites; compression; data; image; image coding; image compression methods; parallel; parallel approach; parallel compact coding; parallel machines; parallel programming environment; processing; programming; PVM; quality; satellite data; satellite images; wavelet packet decomposition; wavelet packet decompositions; wavelet packets; wavelet transforms", pubcountry = "India", treatment = "P Practical", } @InProceedings{Uhl:1995:VPW, author = "A. Uhl", title = "Vector and parallel wavelet transforms for the analysis of time-varying signals", crossref = "Bailey:1995:PSS", pages = "9--14", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "RIST, Salzburg Univ., Austria", classification = "C1130 (Integral transforms); C4190 (Other numerical methods); C4240P (Parallel programming and algorithm theory); C5260 (Digital signal processing); C5440 (Multiprocessing systems); C5620L (Local area networks); C6110P (Parallel programming); C6150N (Distributed systems software)", keywords = "Asynchronous task pool; Continuous wavelet transform; Convex C3440 Vectorcomputer; Load balancing; Master-slave programming scheme; Parallel wavelet transforms; PVM; Speed-up; Time-varying signal analysis; Timing; Vector wavelet transforms; Workstation cluster", thesaurus = "Local area networks; Parallel algorithms; Parallel programming; Resource allocation; Signal processing; Time-varying systems; Timing; Vector processor systems; Wavelet transforms; Workstations", } @Article{Vaughan:1995:MPM, author = "Paula L. Vaughan and Anthony Skjellum and Donna S. Reese and Fei-Chen Cheng", title = "Migrating from {PVM} to {MPI}, part {I}: The {Unify} system", journal = j-FRONTIERS-MASS-PAR-COMP-CONF-PROC, pages = "488--495", month = "????", year = "1995", bibdate = "Fri May 24 09:57:40 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95TH8024.", abstract = "A new kind of portability system for modifying the PVM message passing system to generate the Message Passing Interface (MPI) standard notation for message passing is described. The system, known as Unify, is designed to reduce the effort of learning MPI while providing a sensible means to make use of MPI libraries and MPI calls. It also allows the immediate use of MPI-based parallel libraries in applications.", acknowledgement = ack-nhfb, affiliation = "Mississippi State Univ", affiliationaddress = "Mississippi State, MS, USA", classification = "721.1; 722.3; 722.4; 723.2; 902.2; 921.6", conference = "Proceedings of the 5th Symposium on the Frontiers of Massively Parallel Computation", fjournal = "Frontiers of Massively Parallel Computation --- Conference Proceedings", journalabr = "Front Massively Parallel Comput Conf Proc", keywords = "Computational linguistics; Computer software portability; Computer workstations; Data communication systems; Data handling; Data structures; Database systems; Interfaces (computer); Mathematical models; Message passing; Message Passing Interface; Parallel processing systems; Standards", meetingaddress = "McLean, VA, USA", meetingdate = "Feb 6--9 1995", meetingdate2 = "02/06--09/95", sponsor = "IEEE Computer Society", } @Article{Vincent:1995:HPP, author = "James J. Vincent and Kenneth M. {Merz Jr.}", title = "A highly portable parallel implementation of {AMBER4} using the message passing interface standard", journal = j-J-COMPUT-CHEM, volume = "16", number = "11", pages = "1420--1427", month = nov, year = "1995", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.540161110", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Thu Nov 29 14:54:32 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Chem., Pennsylvania State Univ., University Park, PA, USA", classification = "A3620 (Macromolecules and polymer molecules); A6120J (Computer simulation of static and dynamic liquid behaviour); A8715 (Molecular biophysics); C5220P (Parallel architecture); C7320 (Physics and chemistry computing)", corpsource = "Dept. of Chem., Pennsylvania State Univ., University Park, PA, USA", fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", keywords = "AMBER4; Cray C90; Cray T3D; free energy; free-energy perturbation module Gibbs; Free-energy perturbation module Gibbs; IBM SP1/SP2; lipid bilayer molecular dynamics simulation; Lipid bilayer molecular dynamics simulation; macromolecular modeling package; Macromolecular modeling package; macromolecules; message passing; message passing interface standard; Message passing interface standard; MINMD; molecular biophysics; molecular dynamics method; molecular dynamics/minimization module; Molecular dynamics/minimization module; networked workstations; Networked workstations; perturbation theory; physics computing; portable parallel implementation; Portable parallel implementation", onlinedate = "7 Sep 2004", thesaurus = "Free energy; Macromolecules; Message passing; Molecular biophysics; Molecular dynamics method; Perturbation theory; Physics computing", treatment = "T Theoretical or Mathematical", } @MastersThesis{Viswanathan:1995:PCM, author = "Kishore Viswanathan", title = "A parallel client-server model for distributed computing", type = "M.S. thesis", school = "Department of Computer Science, " # inst-MSU, address = inst-MSU:adr, pages = "vii + 79", year = "1995", bibdate = "Mon Jan 15 16:53:06 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, alttitle = "Distributed computing. Message Passing Interface Forum (MPIF) MPI-Forum 1994", keywords = "Client/server computing.; Electronic data processing --- Distributed processing; Mississippi State University --- Thesis --- (1995); Parallel programming (computer science)", } @InProceedings{Vlassov:1995:MEP, author = "V. Vlassov and H. Ahmed and L.-E. Thorelli", title = "{mEDA-2}: An Extension of {PVM}", crossref = "Malyshkin:1995:PCT", pages = "288--293", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Electrum 204, R. Inst. of Technol., Kista, Sweden", classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Electrum 204, R. Inst. of Technol., Kista, Sweden", keywords = "communication; EDA model; environments; intertask; Intertask communication; mEDA-2; MEDA-2; message passing; Message passing; parallel program; Parallel program termination; parallel programming; parallel programs; Parallel programs; programming; programming environments; Programming environments; PVM; shared memory systems; synchronisation; synchronization; Synchronization; termination; virtual shared memory; Virtual shared memory; VSM", pubcountry = "Germany", thesaurus = "Message passing; Parallel programming; Programming environments; Shared memory systems; Synchronisation", treatment = "P Practical", } @InProceedings{Walker:1995:MVB, author = "D. W. Walker", title = "An {MPI} version of the {BLACS}", crossref = "IEEE:1995:PSP", pages = "129--146", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", classification = "C4140 (Linear algebra); C6110B (Software engineering techniques); C6150N (Distributed systems software)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", keywords = "Basic Linear Communication Subprograms; BLACS; blocking; Blocking; functionality; Functionality; linear algebra; message passing; message passing standard; Message passing standard; MPI; MPI communication modes; MPI Linear Algebra Communication Subprograms; nonblocking communication; Nonblocking communication; software libraries; standards; subroutines", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Linear algebra; Message passing; Software libraries; Standards; Subroutines", treatment = "P Practical", } @TechReport{Walker:1995:RBD, author = "David W. Walker and Steve W. Otto", title = "Redistribution of Block-Cyclic Data Distributions Using {MPI}", number = "ORNL/TM-12999", institution = inst-ORNL, address = inst-ORNL:adr, pages = "iii + 20", month = jun, year = "1995", bibdate = "Tue Jan 16 08:37:06 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/~walker/mpi/redistribution.ps.Z", } @InProceedings{Wang:1995:PPG, author = "Cho-Li Wang and V. K. Prasanna and Young Won Lim", title = "Parallelization of perceptual grouping on distributed memory machines", crossref = "Cantoni:1995:CCA", pages = "323--330", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. Eng. Syst., Univ. of Southern California, Los Angeles, CA, USA", classification = "B6140C (Optical information, image and video signal processing); C4240C (Computational complexity); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C6110P (Parallel programming)", keywords = "16 Node Cray T3D; Architecture independent parallel algorithms; CM-5; Communication startup time; Communication time; Computation time; Distributed memory machines; High performance computing platforms; Line segment extraction; MPI message passing standard; Perceptual grouping; Processing nodes; Transmission rate", thesaurus = "Communication complexity; Computational complexity; Distributed memory systems; Edge detection; Feature extraction; Message passing; Parallel algorithms", } @Article{Wasniowski:1995:NAP, author = "R. A. Wasniowski", title = "Nonlinear adaptive prediction algorithm and its parallel implementation", journal = j-INFORMATICA, volume = "19", number = "3", pages = "371--377", month = sep, year = "1995", CODEN = "INFOFF", ISSN = "0350-5596", ISSN-L = "0350-5596", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "New Mexico Highlands Univ., Las Vagas, NM, USA", classification = "C1220 (Simulation, modelling and identification); C1240 (Adaptive system theory); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6185 (Simulation techniques); C7400 (Engineering computing)", fjournal = "Informatica (Ljubljana, Slovenia)", keywords = "Computation times; Computationally-intensive engineering problems; Cost/performance ratio; Group method of data handling; Heterogeneous machines; Large parallel programs; Massively parallel computers; Nonlinear adaptive prediction algorithm; Parallel algorithm development; Parallel simulators; PVM; Software packages; Systems identification; Workstation networks", pubcountry = "Slovenia", thesaurus = "Adaptive estimation; Digital simulation; Engineering computing; Forecasting theory; Identification; Parallel algorithms", } @TechReport{Werner:1995:UMP, author = "J{\"o}rg Werner", title = "{{\"U}berblick zum Message-Passing-Interface Standard, MPI}. ({German}) [{Overview} of the {Message-Passing Interface Standard, MPI}]", type = "{Parlab-Mitteilungen}", number = "04/95", institution = "Technische Universit{\"a}t Chemnitz-Zwickau", address = "Chemnitz, Germany", pages = "35", year = "1995", bibdate = "Wed Aug 27 06:21:48 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "German", } @InProceedings{West:1995:AVV, author = "J. E. West and M. M. Stephens and L. H. Turcotte", title = "Adaptation of volume visualization techniques to {MIMD} architectures using {MPI}", crossref = "IEEE:1995:PSP", pages = "147--156", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "DoD High Performance Comput. Center, US Army Eng. Waterways Exp. Station, Vicksburg, MS, USA", classification = "C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems); C6130B (Graphics techniques); C6150N (Distributed systems software); C7300 (Natural sciences computing)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "DoD High Performance Comput. Center, US Army Eng. Waterways Exp. Station, Vicksburg, MS, USA", keywords = "data visualisation; distributed memory parallel computers; Distributed memory parallel computers; distributed memory systems; divide and conquer methods; divide-and-conquer approach; Divide-and-conquer approach; high resolution volume datasets; High resolution volume datasets; interprocessor communication; Interprocessor communication; message passing; Message Passing Interface; MIMD architectures; MPI; nCUBE 2; NCUBE 2; parallel algorithm; Parallel algorithm; parallel algorithms; parallel architectures; parallel implementation; Parallel implementation; parallel machines; rendering (computer graphics); scene generation; Scene generation; scientific analysis; Scientific analysis; sequential algorithm; Sequential algorithm; standards; volume rendering method; Volume rendering method; volume visualization techniques; Volume visualization techniques", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Data visualisation; Distributed memory systems; Divide and conquer methods; Message passing; Parallel algorithms; Parallel architectures; Parallel machines; Rendering [computer graphics]; Standards", treatment = "A Application; P Practical; T Theoretical or Mathematical", } @Article{White:1995:PNP, author = "S. White and A. Alund and V. S. Sunderam", title = "Performance of the {NAS} Parallel Benchmarks on {PVM-Based} Networks", journal = j-J-PAR-DIST-COMP, volume = "26", number = "1", pages = "61--71", day = "1", month = apr, year = "1995", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1995.1048", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:18:57 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1048/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1048/production/pdf", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6100 (Software techniques and systems); C7320 (Physics and chemistry computing)", corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "aerodynamics; computational; computing; concurrent; Ethernet; FDDI networks; kernel benchmarks; NAS parallel benchmarks; parallel processing; performance evaluation; PVM system; PVM-based networks; software performance evaluation", treatment = "P Practical", } @InProceedings{Xu:1995:IPP, author = "H. Xu and T. W. Fisher", title = "Improving {PVM} Performance using {ATOMIC} User-Level Protocol", crossref = "Alnuweiri:1995:PHF", pages = "108--117", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Yonezawa:1995:IED, author = "Naoki Yonezawa and Koichi Wada and Motoko Obata", title = "Implementation and evaluation of distributed shared data objects on a workstation cluster", crossref = "IEEE:1995:IPR", pages = "319--322", year = "1995", bibdate = "Fri May 24 09:58:00 MDT 1996", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95CH35765.", abstract = "We are developing a system called KaReN to handle distributed shared data objects on workstations that are connected by Ethernet. The system supplied users a parallel programming environment with virtually shared data objects. The KaReN was developed using the message passing library PVM (Parallel Virtual Machine) to have good portability. To reduce overhead in maintaining data coherence, several methods are introduced. The request merging is introduced to reduce message traffic. The copy transfer messages are also clumped when possible. The weak consistency is another optimization for eliminating unnecessary coherence control message by allowing temporally inconsistent state. This paper presents the organization and the implementation of KaReN. Several applications have been executed for evaluation.", acknowledgement = ack-nhfb, affiliation = "Univ of Tsukuba", affiliationaddress = "Ibaraki, Jpn", classification = "722.3; 722.4; 723.1; 723.2; 921.5; C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conference = "Proceedings of the 1995 IEEE Pacific RIM Conference on Communications, Computers, and Signal Processing", journalabr = "IEEE Pac RIM Conf Commun Comput Signal Process Proc", keywords = "Coherence control message; Computer networks; Computer software portability; Computer workstations; Copy transfer messages; Data coherence; Data handling; Data structures; Distributed computer systems; Distributed shared data objects; Ethernet; KaReN; Message passing library; Message traffic reduction; Object oriented programming; Optimization; Parallel programming environment; Parallel virtual machine; Parallel virtual machine (PVM); Portability; Subroutines; Virtually shared data objects; Weak consistency; Workstation cluster", meetingaddress = "Victoria, BC, Can", meetingdate = "May 17--19 1995", meetingdate2 = "05/17--19/95", sponsor = "IEEE", thesaurus = "Local area networks; Message passing; Network operating systems; Parallel programming; Programming environments; Software portability; Virtual machines; Workstations", } @Article{Yong:1995:SOM, author = "Dou Yong and Zhou Xingming", title = "Super-Object model: implementing shared memory programming mode on distributed memory multicomputers", journal = j-CHIN-J-COMPUTERS, volume = "18", number = "7", pages = "481--487", month = jul, year = "1995", CODEN = "JIXUDT", ISSN = "0254-4164", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Comput. Sci, Univ. of Defence Technol., Changsha, China", classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming)", fjournal = "Chinese Journal of Computers = Chi suan chi hsueh pao", keywords = "Distributed memory multicomputers; Fortran 77; Global address; Implementation; Message passing primitives; Oak Ridge PVM; Performance; Prototype system; Run-time system; Shared memory parallel programming; Shared memory programming mode; Super-Object model; UNIX operating system", language = "Chinese", pubcountry = "China", thesaurus = "Distributed memory systems; Message passing; Parallel programming", } @Article{You:1995:EIM, author = "J. You and E. Pissaloux and W. P. Zhu and H. A. Cohen", title = "Efficient image matching: a hierarchical {Chamfer} matching scheme via distributed system", journal = j-REAL-TIME-IMAGING, volume = "1", number = "4", pages = "245--259", month = oct, year = "1995", CODEN = "REIMFQ", ISSN = "1077-2014", ISSN-L = "1077-2014", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. and Inf. Sci., South Australia Univ., SA, Australia", classification = "B6140C (Optical information, image and video signal processing); C5220P (Parallel architecture); C5260B (Computer vision and image processing techniques)", fjournal = "Real-Time Imaging", keywords = "Chamfer matching scheme; Distance transform; Distributed system; Dynamic thresholding; Edge points; Image matching; Parallel implementation; Parallel Virtual Machine; Pyramid", pubcountry = "UK", thesaurus = "Distributed processing; Image matching; Virtual machines", } @InProceedings{You:1995:PIM, author = "J. You and W. P. Zhu and E. Pissaloux and H. A. Cohen", title = "Parallel image matching on a distributed system", crossref = "Narashimhan:1995:IIF", pages = "870--873 (vol. 2)", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. and Inf. Sci, Univ. of South Australia, The Levels, SA, Australia", classification = "C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming)", keywords = "Distance transform; Distributed memory multicomputer; Distributed system; Heavily iterated computation; Image feature extraction; Image feature pixels; Low cost heterogeneous PVM network; Message-passing; Object recognition; Parallel image matching; Parallel virtual machine; Repeated memory access", thesaurus = "Feature extraction; Image matching; Message passing; Object recognition; Parallel algorithms", } @InProceedings{Zareski:1995:EPG, author = "D. Zareski and B. Wade and P. Hubbard and P. Shirley", title = "Efficient parallel global illumination using density estimation", crossref = "Uselton:1995:PRS", pages = "47--54, 104--105", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Program of Comput. Graphics, Cornell Univ., Ithaca, NY, USA", classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6130B (Graphics techniques); C6150N (Distributed systems software)", keywords = "Arbitrary nondiffuse surfaces; Density estimation; Diffuse inter-reflections; Efficient parallel global illumination; Energy transport; Gouraud-shaded elements; High geometric complexity environments; Interactive walk-throughs; Local area network; Master task; Meshing phase; Multicomputer parallel density estimation global illumination method; Multiple worker tasks; Parallel programs; Parallelization; Particle-tracing phase; PVM software package; Radiosity; Ray-traced images; Shared file system; Still frames; Workstations", thesaurus = "Brightness; Density; Lighting; Local area networks; Parallel algorithms; Parallel programming; Ray tracing; Realistic images; Rendering [computer graphics]; Workstations", } @InProceedings{Zelek:1995:DPP, author = "J. S. Zelek", title = "Dynamic path planning", crossref = "IEEE:1995:IIC", pages = "1285--1290 (vol. 2)", year = "1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Electr. Eng., McGill Univ., Montreal, Que., Canada", classification = "C1230 (Artificial intelligence); C3390C (Mobile robots); C7420 (Control engineering computing)", keywords = "Dynamic path planning; Harmonic function; Message passing software package; Navigation; Nomad robot; Potential field; PVM; SPARC and SGI workstations", thesaurus = "Computerised control; Dynamics; Harmonics; Message passing; Mobile robots; Navigation; Path planning", } @InProceedings{Zhou:1995:FMP, author = "H. Zhou and A. Geist", title = "Faster Message Passing in {PVM}", crossref = "Alnuweiri:1995:PHF", pages = "67--73", year = "1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Zhou:1995:RMR, author = "Honbo Zhou and Al Geist", title = "``Receiver Makes Right'' Data Conversion in {PVM}", crossref = "IEEE:1995:CPI", pages = "458--464", year = "1995", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Using a Receiver Makes it Right (RMR) data conversion technique in PVM significantly improves the message-passing performance in heterogeneous environments. The improvements are due to two factors: (1). RMR reduces the need for conversions in a heterogeneous environment; (2). At most each message is converted, only once compared to twice for XDR used in public version of PVM, and our conversion routines are streamlined and are several times faster than the XDR routines. The drawback to RMR is the potential need for a large number of conversion routines. We demonstrate that only a small number of routines are required because many vendors use the IEEE standard for data representation. Given this fact, RMR may emerge as a promising technique in distributed computing.", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab.", affiliationaddress = "Oak Ridge, TN, USA", classification = "722.1; 722.3; 722.4; 723.1; 723.2; C5440 (Multiprocessing systems); C6120 (File organisation); C7430 (Computer engineering)", conference = "Proceedings of the 1995 IEEE 14th Annual International Phoenix Conference on Computers and Communications", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", journalabr = "Conf Proc Int Phoenix Conf on Comput Commun", keywords = "Buffer storage; Computer software; Computer systems programming; conversion; Conversion routines; Data communication systems; data conversion; Data processing; data structures; Decoding; distributed computing; Distributed computing; Encoding (symbols); heterogeneous environments; Heterogeneous environments; Local area networks; machines; Message passing performance, Data conversion; message-; Message-passing performance; parallel machines; Parallel processing systems; parallel virtual machine; Parallel virtual machine; Parallel virtual machine (PVM); passing performance; PVM; Receiver makes it right (RMR) data conversion; routines; virtual", meetingaddress = "Scottsdale, AZ, USA", meetingdate = "Mar 28--31 1995", meetingdate2 = "03/28--31/95", thesaurus = "Data conversion; Data structures; Parallel machines; Virtual machines", treatment = "P Practical", } @Article{Zhu:1995:RTC, author = "Miaoliang Zhu and Chunming Wu and Youjun Zhang and Yi Jin and Jie Li", title = "A real-time and concurrent intelligent robotic system based on multi-agent architecture", journal = j-HIGH-TECH-LETT, volume = "5", number = "10", pages = "20--24", month = oct, year = "1995", CODEN = "GTONE8", ISSN = "1002-0470", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Artificial Intelligence Inst., Zhejiang Univ., Hangzhou, China", classification = "C1340D (Discrete control systems); C3390C (Mobile robots); C4220 (Automata theory); C5220P (Parallel architecture); C6150N (Distributed systems software); C6170 (Expert systems); C7420 (Control engineering computing); C7430 (Computer engineering)", fjournal = "High Technology Letters", keywords = "Automata; Autonomous mobile robots; Concurrent intelligent robotic system; Discrete event-finite state transformation model; Intelligent architecture; Multi-agent architecture; Multi-computer coherence environment; Parallel virtual machine; Pipeline scheduler; PVM; Real-time Multi-Agent System; RMAS; ROBIX; Simulation", language = "Chinese", pubcountry = "China", thesaurus = "Cooperative systems; Discrete event systems; Finite automata; Intelligent control; Mobile robots; Parallel processing; Pipeline processing; Real-time systems; Scheduling; Virtual machines", } @InProceedings{Zhuang:1995:PRS, author = "Xinglai Zhuang and Jianping Zhu", title = "Parallelizing a reservoir simulator using {MPI}", crossref = "IEEE:1995:PSP", pages = "165--174", year = "1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "NSF Eng. Res. Center for Comput. Field Simulations, Mississippi State Univ., MS, USA", classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7340 (Geophysics computing); C7490 (Computing in other engineering fields)", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "NSF Eng. Res. Center for Comput. Field Simulations, Mississippi State Univ., MS, USA", keywords = "customized communication library; Customized communication library; customized communication subroutines; Customized communication subroutines; digital simulation; geophysics computing; IBM SP1/SP2; Intel; Intel iPSC/860; message passing; Message Passing Interface; MPI; NX communication library; oil technology; parallel architecture; Parallel architecture; parallel code performance; Parallel code performance; parallel code portability; Parallel code portability; parallel computers; Parallel computers; parallel programming; performance; Performance; reservoir simulator; Reservoir simulator; scalability; Scalability; software libraries; standards; subroutines; workstation clusters; Workstation clusters", sponsororg = "Mississippi State Univ.; NSF", thesaurus = "Digital simulation; Geophysics computing; Message passing; Oil technology; Parallel programming; Software libraries; Standards; Subroutines", treatment = "A Application; P Practical", } @InProceedings{Alt:1996:PIA, author = "R. Alt and J. L. Lamotte", title = "Parallel integration across time of initial value problems using {PVM}", crossref = "Bode:1996:PVM", pages = "323--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4130 (Interpolation and function approximation); C4170 (Differential equations)C6150N (Distributed systems software); C4240P (Parallel programming and algorithm theory); C7310 (Mathematics computing)", corpsource = "MASI and Institut Blaise Pascal, Paris, France", keywords = "approximation theory; collocation; Connection Machine CM5; differential; differential equations; distributed architectures; divided differences; equation; initial value; initial value problems; linear system; mathematics computing; method; nonlinear system; parallel; parallel algorithm; parallel algorithms; parallel integration; parallel machines; Picard iterations; polynomial approximation; problems; virtual machine", pubcountry = "Germany", treatment = "T Theoretical or Mathematical", } @Article{Anglano:1996:PMB, author = "C. Anglano and L. Portinale", title = "Parallel Model-Based Diagnosis Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "331--334", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150N (Distributed systems software)C1160 (Combinatorial mathematics); C7440 (Civil and mechanical engineering computing)", corpsource = "Dipartimento di Inf., Universita' di Torino, Italy", fjournal = "Lecture Notes in Computer Science", keywords = "automobiles; car fault diagnosis; computing; fault diagnosis; identification; mechanical engineering; message passing; methods; MIMD message passing program; parallel; parallel backward reachability; parallel machines; parallel model-based diagnosis; parallel programs; parallel virtual machine; Petri net model; Petri nets; programming; reachability analysis; space; state; state-space; virtual machines", pubcountry = "Germany", treatment = "A Application; P Practical", } @Article{Anonymous:1996:BRMh, author = "Anonymous", title = "Book Review: {{\booktitle{MPI: the compete reference}}: By Marc Snir, Steve Otto, Steven Huss-Lederman, David Walker, and Jack Dongarra. MIT Press, Cambridge, MA. (1996). 336 pages. \$27.50}", journal = j-COMPUT-MATH-APPL, volume = "31", number = "11", pages = "140--140", month = jun, year = "1996", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:23 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/0898122196873494", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Misc{Anonymous:1996:IPP, author = "Anonymous", title = "An Introduction to {PVM} Programming", howpublished = "World-Wide Web", year = "1996", bibdate = "Tue Jan 16 08:17:36 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/pvm/intro.html", } @Misc{Anonymous:1996:PPA, author = "Anonymous", title = "Porting {PVM} Applications to the {Intel Paragon}", howpublished = "World-Wide Web", year = "1996", bibdate = "Tue Jan 16 08:25:19 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.ccs.ornl.gov/news/guide/xps_pvm.html", } @Misc{Anonymous:1996:RP, author = "Anonymous", title = "Research Program", howpublished = "World-Wide Web", year = "1996", bibdate = "Tue Jan 16 08:26:39 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/networking/", } @InProceedings{Arbenz:1996:MDS, author = "P. Arbenz and M. Billeter and P. G{\"u}ntert and P. Luginb{\"u}hl and M. Taufer and U. {von Matt}", title = "Molecular dynamics simulations on {Cray} clusters using the {SCIDDLE-PVM} environment", crossref = "Bode:1996:PVM", pages = "142--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0260 (Numerical approximation and analysis); A0270 (Computational techniques); A0320 (Classical mechanics of discrete systems: general mathematical aspects); A6120J (Computer simulation of static and dynamic liquid behaviour); A8715H (Biomolecular dynamics, molecular probes, molecular pattern recognition); C6110P (Parallel programming); C6150N (Distributed systems software); C7320 (Physics and chemistry computing); C7330 (Biology and medical computing)", corpsource = "Inst. of Sci. Comput., Swiss Federal Inst. of Technol., Zurich, Switzerland", keywords = "acids; asynchronous remote procedure calls; atom trajectory computation; biochemistry; biology computing; classical mechanics; client-server; communication; computer simulation; computing; Cray clusters; Cray computers; digital simulation; distributed algorithms; energy minimization; environment; minimisation; molecular biophysics; molecular dynamics method; molecular dynamics simulations; Newtonian equations of motion; nucleic; OPAL; paradigm; parallelization; physics; primitive; proteins; SCIDDLE-PVM; software package; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Arbenz:1996:SRP, author = "P. Arbenz and W. Gander and H. P. L{\"u}thi and U. {von Matt}", title = "{Sciddle} 4.0, or, remote procedure calls in {PVM}", crossref = "Liddell:1996:HPC", pages = "820--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150N (Distributed systems software)", corpsource = "Inst. of Sci. Comput., Swiss Federal Inst. of Technol., Zurich, Switzerland", keywords = "client process; client-server systems; data transfers; explicit; large data sets; message passing; overhead; parallel processing; parallelism; processes; remote procedure calls; Sciddle 4.0; server; tree structure", pubcountry = "Germany", treatment = "P Practical", } @Article{Attiya:1996:ERS, author = "H. Attiya", title = "Efficient and Robust Sharing of Memory in Message-Passing Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1151", pages = "56--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bachem:1996:STH, author = "A. Bachem and Hochst{\"a}ttler and M. Malich", title = "The Simulated Trading Heuristic for Solving Vehicle Routing Problems", journal = j-DISCRETE-APPL-MATH, volume = "65", number = "1-3", institution = "Mathematisches Institut, Universit{\"a}t zu K{\"o}ln", address = "Weyertal 86-90, 50931 K{\"o}ln, Germany", pages = "47--72", month = "????", year = "1996", CODEN = "DAMADU", ISSN = "0166-218X (print), 1872-6771 (electronic)", ISSN-L = "0166-218X", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Techreports/ZPR.Koeln.bib", annote = "We present an improvement heuristic for vehicle routing problems. The heuristic finds complex customer interchanges to improve an initial solution. Our approach is modular, thus it is easily adjusted to different side constraints such as time windows, backhauls and a heterogeneous vehicle fleet. The algorithm is well suited for parallelization. We report on a parallel implementation of the Simulated Trading heuristic on a cluster of workstations using PVM. The computational results obtained with sequential and parallel Simulated Trading show that our approach is competitive compared to all heuristics known to the authors by now.", crindex = "120k,29,zpr93-139.ps.gz", fjournal = "Discrete Applied Mathematics", xxnote = "Check final page number??", } @Article{Bader:1996:PPA, author = "David A. Bader and David R. Helman and Joseph J{\'a}J{\'a}", title = "Practical parallel algorithms for personalized communication and integer sorting", journal = j-ACM-J-EXP-ALGORITHMICS, volume = "1", pages = "3:1--3:??", month = "????", year = "1996", CODEN = "????", DOI = "https://doi.org/10.1145/235141.235148", ISSN = "1084-6654", bibdate = "Mon Oct 6 16:01:58 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "A fundamental challenge for parallel computing is to obtain high-level, architecture independent, algorithms which efficiently execute on general-purpose parallel machines. With the emergence of message passing standards such as MPI, it has become easier to design efficient and portable parallel algorithms by making use of these communication primitives. While existing primitives allow an assortment of collective communication routines, they do not handle an important communication event when most or all processors have non-uniformly sized personalized messages to exchange with each other. We focus in this paper on the h-relation personalized communication whose efficient implementation will allow high performance implementations of a large class of algorithms. While most previous h-relation algorithms use randomization, this paper presents a new deterministic approach for h-relation personalized communication with asymptotically optimal complexity for h>p$^2$. As an application, we present an efficient algorithm for stable integer sorting. The algorithms presented in this paper have been coded in Split-C and run on a variety of platforms, including the Thinking Machines CM-5, IBM SP-1 and SP-2, Cray Research T3D, Meiko Scientific CS-2, and the Intel Paragon. Our experimental results are consistent with the theoretical analysis and illustrate the scalability and efficiency of our algorithms across different platforms. In fact, they seem to outperform all similar algorithms known to the authors on these platforms.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Journal of Experimental Algorithmics", } @InProceedings{Barak:1996:PPM, author = "A. Barak and A. Braverman and I. Gilderman and O. Laden", title = "Performance of {PVM} with the {MOSIX} preemptive process migration scheme", crossref = "IEEE:1996:PSI", pages = "38--45", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Inst. of Comput. Sci., Hebrew Univ., Jerusalem, Israel", keywords = "allocation; assignments; communication bound benchmarks; computing; CPU benchmarks; high performance; idle workstation use; load-balancing; local area networks; MOSIX multicomputer operating system; MOSIX preemptive process migration scheme; multi-tasking applications; multiprogramming; network operating; operating systems (computers); parallel algorithms; parallel computing; parallel programming; process migration; process migration algorithms; programming environments; PVM performance; resource; software performance evaluation; static process assignment; system utilization; systems; task; transparent; UNIX; Unix; workstation networks; workstations", sponsororg = "IEEE Computer. Soc., Israel Chapter", treatment = "P Practical", } @InProceedings{Beguelin:1996:TMD, author = "A. Beguelin and V. Sunderam", title = "Tools for monitoring, debugging, and programming in {PVM}", crossref = "Bode:1996:PVM", pages = "7--13", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "Carnegie Mellon Univ., Pittsburgh, PA, USA", keywords = "authoring languages; buffered tracing; data visualisation; debugging tools; graphical console; Java language; JavaPVM; JPVM; languages; object-oriented; ParaGraph visualization tool; parallel programming; Parallel Virtual Machine; PGPVM; PIOUS; program debugging; program monitoring tools; program tracing; programming; PVaniM; PVM; PVMRPC; remote procedure style; sampling; software; system monitoring; TCL; techniques; tkPVM; tools; virtual machines; XPVM", pubcountry = "Germany", treatment = "P Practical", } @Article{Bernaschi:1996:RHP, author = "Massimo Bernaschi", title = "The requirements of a high performance implementation of {PVM}", journal = j-FUT-GEN-COMP-SYS, volume = "12", number = "1", pages = "3--11", month = may, year = "1996", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jul 15 09:06:07 MDT 2005", bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome, Italy", fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "AIX operating system; de facto standard; distributed computing; high performance implementation; IBM parallel; machine; message; message passing; parallel machines; parallel virtual; passing; performance evaluation; POWER 2 architecture; programming interface; PVM; PVMe; run-time; SP2; system; system support; virtual machines", pubcountry = "Netherlands", remark = "Resource Management in Distributed Systems", treatment = "P Practical", } @InProceedings{Bhandarkar:1996:MPM, author = "M. A. Bhandarkar and L. V. Kale", title = "{MICE}: a prototype {MPI} implementation in {Converse} environment", crossref = "IEEE:1996:PSM", pages = "26--31", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Illinois Univ., Urbana, IL, USA", keywords = "Abstract Device Interface; application program interfaces; communication; computations; Converse interoperable parallel programming environment; message managers; message passing; MICE; MPI modules; MPICH; multi-threaded MPI programs; open systems; parallel programming; programming environments; prototype MPI implementation; public-domain MPI implementation; PVM interoperation; thread objects; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Blaszczyk:1996:EPI, author = "A. Blaszczyk and C. Trinitis", title = "Experience with {PVM} in an industrial environment", crossref = "Bode:1996:PVM", pages = "174--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B5110 (Electrostatics); B8300 (Power apparatus and electric machines); C6110P (Parallel programming); C6150N (Distributed systems software); C6155 (Computer communications software); C7410 (Electrical engineering computing); C7430 (Computer engineering)", corpsource = "Asea Brown Boveri AG, Heidelberg, Germany", keywords = "3D; Asea Brown Boveri; CAD; cluster; code; computer communications software; configuration; ease of use; efficiency; electric fields; electrical engineering computing; heterogeneous workstation clusters; high-voltage engineering; high-voltage equipment; industrial environment; multiprocessor machines; parallel code; parallel programming; Parallel Virtual Machine; parallelization; PVM communication software; reliability; simulation; virtual machines", pubcountry = "Germany", treatment = "A Application", } @InProceedings{Blum:1996:PIP, author = "J. M. Blum and T. M. Warschko and W. F. Tichy", title = "{PSPVM}: implementing {PVM} on a high-speed interconnect for workstation clusters", crossref = "Bode:1996:PVM", pages = "235--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5620L (Local area networks); C6150N (Distributed systems software); C6180 (User interfaces)", corpsource = "Dept. of Inf., Karlsruhe Univ., Germany", keywords = "25 mus; application speed-up; code compatibility; exchange; latency; local area networks; message; message passing; message transmission; multiprocessing; object-; parallel machines; ParaStation high-speed interconnect; ParaStation user interface; PSPVM; PVM package; systems; throughput; user interfaces; user level communication; user-level socket emulation; workstation clusters; workstations", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Bonnet:1996:UPW, author = "C. Bonnet", title = "Using {PVM} in wireless network environments", crossref = "Bode:1996:PVM", pages = "296--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6210L (Computer communications); C5470 (Performance evaluation and testing); C5620L (Local area networks); C5670 (Network performance)", corpsource = "Inst. Eurecom, Sophia Antipolis, France", keywords = "message passing model; networked environments; parallel machines; parallel virtual machine; performance evaluation; PVM; virtual machines; wireless LAN; wireless local area network; WLAN", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Bouchard:1996:FCS, author = "V. Bouchard and P. Cinquin and L. Desbat", title = "First {Compton} scatter correction in {SPECT} using {PVM}", crossref = "Grangeat:1996:PTI", pages = "109--111", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A8710 (General, theoretical, and mathematical biophysics); A8760K (Nuclear medicine, emission tomography); A8770E (Patient diagnostic methods and instrumentation); B6140C (Optical information, image and video signal processing); B7510B (Radiation and radioactivity applications in biomedicine); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C7330 (Biology and medical computing)", corpsource = "Fac. de Med., TIMC-IMAG, La Tronche, France", keywords = "3D algorithms; Compton effect; computed tomography; diagnostic imaging; first Compton interaction; gamma-ray scattering; Klein-Nishina formula; medical; medical image; modeling; nuclear medicine; parallel algorithms; parallel virtual machine; physical; processing; registered scanner reconstruction; single photon emission; SPECT Compton scatter correction", pubcountry = "France", treatment = "T Theoretical or Mathematical", } @InProceedings{Brightwell:1996:DIM, author = "R. Brightwell and L. Shuler", title = "Design and implementation of {MPI} on {Puma} portals", crossref = "IEEE:1996:PSM", pages = "18--25", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6150E (General utility programs); C6150J (Operating systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Massively Parallel Comput. Res. Lab., Sandia Nat. Labs., Albuquerque, NM, USA", keywords = "application program interfaces; Argonne National Laboratory/Mississippi State University Message Passing Interface standard implementation; high performance message passing environment; Intel Paragon; Intel TeraFLOPS machine; massively parallel computers; message passing; MPI; MPI collective communication; MPI point- to-point communications; MPI-2 one-sided communications; network operating systems; operating systems (computers); parallel architectures; parallel machines; Puma operating system; Puma portals; software portability; SUNMOS; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Bubak:1996:MPP, author = "M. Bubak and W. Funika and J. Moscinski", title = "Monitoring of performance of {PVM} applications on virtual network computer", crossref = "Wasniewski:1996:APC", pages = "147--156", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Inst. of Comput. Sci., AGH, Krakow, Poland", keywords = "computer; data visualisation; metaformat; metrics; monitoring; Pablo-based tool; parallel machines; parallel programming; parallel programs; performance monitoring; PVM applications; SDDF; software performance evaluation; software tools; system; Tape/PVM; toolkit; virtual machines; virtual network; visualization", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Bubak:1996:PBP, author = "M. Bubak and W. Funika and J. Moscinski and D. Tasak", title = "Pablo-based performance monitoring tool for {PVM} applications", crossref = "Dongarra:1996:APC", pages = "69--78", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "Inst. of Comput. Sci., AGH, Krakow, Poland", keywords = "3-D molecular dynamics program; conjugate gradient benchmark; Pablo environment; ParaGraph functions; parallel programming; performance monitoring; PVM applications; software performance evaluation; system monitoring; trace file; XPVM", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Bubak:1996:PPM, author = "M. Bubak and W. Funika and J. Moscinski and D. Tasak", title = "{Pablo-Based} Performance Monitoring Tool for {PVM} Applications", crossref = "Dongarra:1996:APC", pages = "69--78", year = "1996", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Cavenaghi:1996:UPS, author = "M. A. Cavenaghi and R. Spolon and J. E. M. Perea-Martins and S. G. Domingues and A. {Garcia Neto}", title = "Using {PVM} in the simulation of a hybrid dataflow architecture", crossref = "Bode:1996:PVM", pages = "343--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6185 (Simulation techniques); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Sao Paulo State Univ., Brazil", keywords = "data flow computing; digital simulation; hybrid dataflow architecture; interconnection network; machines; message passing; message passing environment; multiplexing; multiprocessor system; optical; optical interconnections; parallel architectures; parallel execution; sequential execution; simulator; system; uniprocessor; virtual; wavelength division; wavelength division multiplexing; WDM techniques", pubcountry = "Germany", treatment = "A Application; P Practical", } @Article{Charny:1996:MPV, author = "B. Charny", title = "Matrix partitioning on a virtual shared memory parallel machine", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "7", number = "4", pages = "343--355", month = apr, year = "1996", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/71.494629", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture)", corpsource = "Audre Inc., San Diego, CA, USA", fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", keywords = "contention-; free partitionings; load-balanced; machines; matrix; matrix decomposition; memory contention; parallel; parallel algorithms; parallel machine; partitioning; performance issues; shared memory systems; virtual shared memory; virtual storage", treatment = "T Theoretical or Mathematical", } @Article{Chengqing:1996:WIP, author = "Ye Chengqing and Cui Zhenqian", title = "The ways of improving parallel computing efficiency in {PVM}", journal = j-MINI-MICRO-SYSTEMS, volume = "17", number = "4", pages = "12--16", month = apr, year = "1996", CODEN = "XWJXEH", ISSN = "1000-1220", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6185 (Simulation techniques); C7430 (Computer engineering)C6150J (Operating systems)", corpsource = "State Key Lab. of CAD/CG, Zhejiang Univ., Hangzhou, China", fjournal = "Mini-Micro Systems", keywords = "balancing algorithm; communication overhead; digital simulation; distributed system environment; hosts; load; local area network; message exchange; network partitioning; parallel; parallel computing efficiency; parallel machines; PVM; resource allocation; strategy; virtual machine; virtual machines", language = "Chinese", pubcountry = "China", treatment = "P Practical", } @Article{Ciampolini:1996:EPM, author = "A. Ciampolini and C. Stefanelli", title = "Extending {PVM} to a massively parallel architecture", journal = j-FUT-GEN-COMP-SYS, volume = "12", number = "1", pages = "13--23", month = may, year = "1996", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jul 15 09:06:07 MDT 2005", bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, classification = "C1250 (Pattern recognition); C5220P (Parallel architecture); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C6115 (Programming support); C7430 (Computer engineering)", corpsource = "Dipartimento di Elettronica, Inf. e Sistemistica, Bologna Univ., Italy", fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "applications; architecture; computational vision application; computer vision; fine-grained parallel; heterogeneous computing; machines; massively parallel architecture; Meiko Computing Surface; multicomputer; parallel; parallel architectures; parallel machines; programming environment; programming environments; transputer technology; Unix workstations; virtual machines", pubcountry = "Netherlands", remark = "Resource Management in Distributed Systems", treatment = "A Application; P Practical", } @InProceedings{Clematis:1996:CEP, author = "A. Clematis and V. Gianuzzi", title = "{CPVM} --- extending {PVM} for consistent checkpointing", crossref = "IEEE:1996:PFE", pages = "67--76", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Istituto per la Matematica Applicata, CNR, Genova, Italy", keywords = "concurrency control; consistent checkpointing; CPVM; deadlocks; fault-tolerance; global checkpoint-restart algorithms; job-swapping; migration; nonblocking; parallel programming; Parallel Virtual Machine; PVM; software; software fault; software libraries; software library; software portability; software tools; termination; tolerance", treatment = "P Practical", } @InProceedings{Clemencon:1996:THM, author = "C. Clemencon and K. M. Decker and V. R. Deshpande and A. Endo and J. Fritscher and P. A. R. Lorenzo and N. Masuda and A. Muller and R. Ruhl and W. Sawyer and B. J. N. Wylie and F. Zimmermann", title = "Tools-supported {HPF} and {MPI} parallelization of the {NAS} parallel benchmarks", crossref = "IEEE:1996:FSS", pages = "309--318", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems)", conftitle = "Proceedings of 6th Symposium on the Frontiers of Massively Parallel Computation (Frontiers '96)", corpsource = "Centro Svizzero di Calcolo Sci., Manno, Switzerland", keywords = "Annai tool; code development time; communication libraries; compilers; distributed memory systems; FORTRAN; High Performance Fortran; high-level language; message passing; Message Passing Interface; NAS parallel benchmarks; NEC Cenju-3 distributed-memory parallel processor; parallel benchmark kernels; parallel languages; parallel programming; performance; portable parallel applications; program compilers; program debugging; scalability; scientific applications; sequential languages; software libraries; software performance evaluation; software tools", sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center; URSA/CESDIS", treatment = "P Practical", } @InProceedings{Clement:1996:NPM, author = "Mark J. Clement and Michael R. Steed and Phyllis E. Crandall", title = "Network Performance Modeling for {PVM} Clusters", crossref = "ACM:1996:SCP", pages = "??--??", year = "1996", bibdate = "Mon Mar 23 12:31:18 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/CLEMENT/INDEX.HTM", acknowledgement = ack-nhfb, } @Article{Conforti:1996:PIA, author = "D. Conforti and L. {de Luca} and L. Grandinetti and R. Musmanno", title = "A parallel implementation of automatic differentiation for partially separable functions using {PVM}", journal = j-PARALLEL-COMPUTING, volume = "22", number = "5", pages = "643--656", day = "8", month = aug, year = "1996", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:14:59 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1996&volume=22&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1996&volume=22&issue=5&aid=1065", acknowledgement = ack-nhfb, classification = "B0290M (Numerical integration and differentiation); C4160 (Numerical integration and differentiation); C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Dipartimento di Elettronica, Inf. e Sistemistica, Calabria Univ., Italy", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "automatic differentiation; differentiation; distributed memory; finite-difference approximation; multiprocessor system; parallel algorithms; parallel implementation; partially separable functions; PVM; substantial speed-up", pubcountry = "Netherlands", treatment = "T Theoretical or Mathematical", } @InProceedings{Corbett:1996:OMP, author = "P. Corbett and D. Feitelson and S. Fineberg and Yarsun Hsu and B. Nitzberg and J.-P. Prost and M. Snir and B. Traversat and Parkson Wong", title = "Overview of the {MPI-IO} parallel {I/O} interface", crossref = "Jain:1996:IOP", pages = "127--146", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6120 (File organisation); C6150N (Distributed systems software); C6180 (User interfaces)", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", keywords = "asynchronous I/O operations; collective interface; data structures; file data partitioning; global data structures; high-level interface; message passing; MPI-IO parallel I/O interface; parallel file systems; parallel machine; parallel programming; portable message passing parallel programs; process memories; storage devices; user interfaces", treatment = "A Application; P Practical", } @InProceedings{Cotronis:1996:ECP, author = "J. Y. Cotronis and E. Floros and N. Papazis", title = "Efficient composition of {PVM} programs", crossref = "Liddell:1996:HPC", pages = "919--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming)", corpsource = "Dept. of Inf., Athens Univ., Greece", keywords = "communication; Distribution of Maximum; parallel programming; process algebra; PVM; PVM programs; terminal process; topologies; tree process communication", pubcountry = "Germany", treatment = "T Theoretical or Mathematical", } @InProceedings{Coulaud:1996:EIP, author = "O. Coulaud and E. Dillon", title = "Early implementation of {Para++} with {MPI-2}", crossref = "IEEE:1996:PSM", pages = "95--101", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Inst. Nat. de Recherche en Inf. et Autom., Villers-les- Nancy, France", keywords = "application program interfaces; C language; dynamic process chapter; dynamic process management; early implementation; inter-communicator operations; internal implementation; LAM 6.0; message passing; Message Passing Interface; MPI-2; Para++ 2.0; PVM; software libraries", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Dantas:1996:ILB, author = "M. A. R. Dantas and E. J. Zaluska", title = "Improving load balancing in an {MPI} environment with resource management", crossref = "Liddell:1996:HPC", pages = "959--960", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conftitle = "High-Performance Computing and Networking. International Conference and Exhibition HPCN Europe 1996", corpsource = "Dept. of Electron. and Comput. Sci., Southampton Univ., UK", keywords = "load balancing; local area networks; message passing; Message Passing Interface; MPI environment; parallel programming; process migration; programming environments; resource allocation; resource management facility; workstation clusters; workstations", treatment = "P Practical", } @InProceedings{Demaine:1996:FCC, author = "E. Demaine", title = "First class communication in {MPI}", crossref = "IEEE:1996:PSM", pages = "189--194", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Waterloo Univ., Ont., Canada", keywords = "application program interfaces; C; channel creation; communication events; Concurrent ML; concurrent-programming languages; dynamic process creation; Fortran; higher-order concurrency; message passing; Message Passing Interface; message-passing; MPI; Occam; parallel languages; parallel programming; run- time; software libraries; software standards; standard; Standard ML; static model", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Deshpande:1996:MIBa, author = "V. Deshpande and W. Sawyer and D. W. Walker", title = "An {MPI} implementation of the {BLACS}", crossref = "IEEE:1996:PSM", pages = "195--198", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C5220P (Parallel architecture); C6110B (Software engineering techniques); C6115 (Programming support); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Swiss Center for Sci. Comput., Manno, Switzerland", keywords = "application program interfaces; Basic Linear Communication Subprograms; BLACS; libraries; matrix algebra; message passing; MPI BLACS implementation; MPI functionality; MPI libraries; parallel architectures; performance; software libraries; software performance evaluation; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Deshpande:1996:MIBb, author = "V. Deshpande and W. Sawyer", title = "An {MPI} implementation of the {BLACS}", crossref = "IEEE:1996:ICH", pages = "463--468", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7310 (Mathematics computing)", conftitle = "Proceedings of 3rd International Conference on High Performance Computing (HiPC)", corpsource = "Software Technol. Group, Swiss Center for Sci. Comput., Manno, Switzerland", keywords = "Basic Linear Algebra Communication Subprograms; benchmark; BLACS; factorization; linear algebra; mathematics computing; message passing; Message Passing Interface; MPI implementation; parallel architectures; performance; ScaLAPACK library; software libraries; software packages; software performance evaluation", sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Dinda:1996:PIA, author = "P. A. Dinda and D. R. O'Hallaron", title = "The performance impact of address relation caching", crossref = "Szymanski:1996:LCR", pages = "213--226", year = "1996", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", keywords = "Address computation; Address relation caching; Cache; Critical path; Data transfer; Deposit model communication; Distributed programming; End-to-end latency; Fine grain analytic model; Memory bandwidth; Message passing; Parallel programming; Performance impact", thesaurus = "Cache storage; Distributed processing; Message passing; Parallel programming", } @InProceedings{DiNucci:1996:CDS, author = "D. C. DiNucci", title = "Cooperative Data Sharing: a layered approach to an architecture-independent {Message-Passing Interface}", crossref = "IEEE:1996:PSM", pages = "58--65", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6150E (General utility programs); C6150J (Operating systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "NASA Ames Res. Center, Moffett Field, CA, USA", keywords = "application development; application program interfaces; architecture-independent message-passing interface; CDS1; CDS2; communication semantics; contiguous data; Cooperative Data Sharing System; local area networks; low-level portable interface; message passing; Message Passing Kernel project; MPI; network operating systems; one-sided communication; operating system kernels; queues; semantics; SGI Power Challenge Array; Solaris; Sun workstation network; utility programs; workstations", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @Article{Djordjevic:1996:ICI, author = "G. L. Djordjevic and M. K. Stojcev", title = "An interprocessor communication interface for message passing via shared memory modules-design and performances", journal = j-COMP-ART-INTELL, volume = "15", number = "1", pages = "1--34", month = "????", year = "1996", CODEN = "CARIDY", ISSN = "0232-0274", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5150 (Other circuits for digital computers); C5250 (Microcomputer techniques); C5430 (Microcomputers); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C5610S (System buses)", corpsource = "Fac. of Electron. Eng., Nish, Yugoslavia", fjournal = "Computers and Artificial Intelligence = Vychislitel'nye mashiny i iskusstvennyi intellekt", keywords = "communication bandwidth; communication module; communication throughput; configuration flexibility; data transfer; fully connected n-side pyramid; heterogeneous processors; host computer accelerator; interprocessor communication interface; local memory; message latency; message passing; microcomputers; multi-microcomputer system; multiprocessor interconnection networks; performance evaluation; shared memory bus; shared memory modules; shared memory systems; simulation; single board computers; storage management chips; system buses; system efficiency; system operation; system topology; two-side accessible memory chips", treatment = "P Practical", } @Article{Dong:1996:SPL, author = "Li Dong and Li Xiaoming and Fang Binxing", title = "The study on the parallel library based on {MPI}", journal = j-MINI-MICRO-SYSTEMS, volume = "17", number = "12", pages = "17--19", year = "1996", CODEN = "XWJXEH", ISSN = "1000-1220", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Harbin Inst. of Technol., China", fjournal = "Mini-Micro Systems", keywords = "MPI; parallel library; parallel programming; parallel programming environments; software libraries; workstation network", language = "Chinese", treatment = "P Practical", } @Article{Dongarra:1996:MPS, author = "Jack J. Dongarra and Steve W. Otto and Marc Snir and David Walker", title = "A message passing standard for {MPP} and workstations", journal = j-CACM, volume = "39", number = "7", pages = "84--90", month = jul, year = "1996", CODEN = "CACMA2", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Mon Aug 26 07:42:43 MDT 1996", bibsource = "Compendex database; http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/toc/Abstracts/cacm/234000.html", abstract = "The Message Passing Interface (MPI) is a portable message-passing standard that facilitates development of parallel applications and libraries. MPI has been developed over a 12-month period in 1993 to 1994 of intensive meetings involving more than 80 people from approximately 40 organizations, mainly from the U.S. and Europe. Programming in MPI is straightforward and similar to programming with other message-passing interfaces.", acknowledgement = ack-nhfb, affiliation = "Univ of Tennessee", affiliationaddress = "Knoxville, TN, USA", classification = "716.1; 722.2; 722.3; 722.4; 723.1; 902.2", fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", journalabr = "Commun ACM", keywords = "algorithms; Application programming interface; C (programming language); Communication library routines; Computer networks; Computer software; Computer systems programming; Computer workstations; Concurrency control; Concurrent programs; Data communication systems; design; FORTRAN (programming language); Interfaces (computer); languages; Massively parallel processing; Message passing interface; Message passing programs; Message passing standard; Networks of workstations; Parallel processing systems; Point to point communications; Program compilers; standardization; Standards; Subroutines", subject = "{\bf D.4.4}: Software, OPERATING SYSTEMS, Communications Management, Message sending. {\bf D.2.7}: Software, SOFTWARE ENGINEERING, Distribution and Maintenance, Portability. {\bf D.2.0}: Software, SOFTWARE ENGINEERING, General, Standards. {\bf D.2.2}: Software, SOFTWARE ENGINEERING, Tools and Techniques, Software libraries. {\bf D.3.2}: Software, PROGRAMMING LANGUAGES, Language Classifications, Concurrent, distributed, and parallel languages. {\bf D.1.3}: Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Parallel programming.", } @InProceedings{Dongarra:1996:SRP, author = "J. J. Dongarra and T. Hey and E. Strohmaier", title = "Selected results from the {PARKBENCH} benchmark", crossref = "Bouge:1996:EPP", volume = "2", pages = "251--254", year = "1996", bibdate = "Sat Mar 22 15:39:54 MST 1997", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classcodes = "C5220P (Parallel architecture); C5470 (Performance evaluation and testing); C6150G (Diagnostic, testing, debugging and evaluating systems)", conflocation = "Lyon, France; 26-29 Aug. 1996", conftitle = "Proceedings of European Conference on Parallel Processing EURO-PAR '96", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "computer testing; evaluation; hierarchical; MPI; parallel architectures; parallel benchmarks; PARKBENCH benchmark; performance; PVM; suite", treatment = "P Practical", } @InProceedings{Ebner:1996:TFP, author = "R. Ebner and A. Pfaffinger", title = "Transformation of functional programs into data flow graphs implemented with {PVM}", crossref = "Bode:1996:PVM", pages = "251--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4190 (Other numerical methods); C4210L (Formal languages and computational linguistics); C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6120 (File organisation); C6140D (High level languages); C6150C (Compilers, interpreters and other processors); C6150N (Distributed systems software)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "algorithms; automatic coarse-grain program; C procedure generation; communication; compiler; compilers; computational linguistics; data flow; data flow graphs; data structures; distributed tree-like data structures; dynamic data; FASAN; FASAN schedulers; function node evaluation; functional; functional language; functional program transformation; functional programming; inherent parallelism; languages; local area networks; maximal; numerical analysis; parallel programming; parallelising; parallelization; processor scheduling; PVM library; recursive numerical; semantics; software libraries; stream flow semantics; structure; tree; workstation clusters; workstations; wrapper streams", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Fabero:1996:DLB, author = "J. C. Fabero and I. Martin and A. Bautista and S. Molina", title = "Dynamic load balancing in a heterogeneous environment under {PVM}", crossref = "IEEE:1996:PFE", pages = "414--419", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Dept. de Inf. y Autom., Univ. Complutense de Madrid, Spain", keywords = "allocation; computational load; computational requirements; computer aided software engineering; dynamic load balancing; heterogeneous environment; heterogeneous workstations net; parallel algorithms; processor scheduling; programming environments; resource; virtual storage", treatment = "P Practical", } @Article{Fagg:1996:PIP, author = "Graham Fagg and Jack Dongarra", title = "{PVMPI}: An Integration of {PVM} and {MPI} Systems", journal = "Calculateurs Parall{\`e}les", volume = "8", number = "2", pages = "151--166", year = "1996", CODEN = "????", ISSN = "1260-3198", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/papers/pvmpi/paper.html; http://www.netlib.org/utk/papers/pvmpi/pvmpi.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/pvmpi.pdf", acknowledgement = ack-nhfb, } @InProceedings{Fagg:1996:TGR, author = "G. E. Fagg and K. S. London and J. J. Dongarra", title = "Taskers and general resource managers: {PVM} supporting {DCE} process management", crossref = "Bode:1996:PVM", pages = "180--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150E (General utility programs); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN, USA", keywords = "allocation schemes; application program interfaces; DCE process; distributed algorithms; distributed computing environments; dynamic meta-computing environments; general resource managers; management; Message; message passing; MPI; MPIRUN systems; operations; Parallel Virtual Machine; Passing Interface; processor scheduling; PVM 3.4 release; PVM internal; PVMPI project; resource allocation; schedulers; standardised plug-in; taskers; user-controlled flexibility; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Fang:1996:SPP, author = "N. Fang and H. Burkhart", title = "Structured parallel programming using {MPI}", crossref = "Liddell:1996:HPC", pages = "840--847", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support)", conftitle = "High-Performance Computing and Networking. International Conference and Exhibition HPCN Europe 1996", corpsource = "Dept. of Inf., Basel Univ., Switzerland", keywords = "higher abstractions; higher-level functions; message passing; message passing interface; message-passing programs; parallel programming; portability; programmer-oriented abstractions; programming environment; programming environments; structured parallel programming; system-oriented level", treatment = "P Practical", } @InProceedings{Fineberg:1996:PPI, author = "S. A. Fineberg and P. Wong and B. Nitzberg and C. Kuszmaul", title = "{PMPIO-a} portable implementation of {MPI-IO}", crossref = "IEEE:1996:FSS", pages = "188--195", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", conftitle = "Proceedings of 6th Symposium on the Frontiers of Massively Parallel Computation (Frontiers '96)", corpsource = "Numerical Aerodynamic Simulation, NASA Ames Res. Center, Moffett Field, CA, USA", keywords = "Cray J90; IBM SP-2; input-output programs; Intel Paragon; message passing; message passing interface; MPI-IO; parallel programming; PMPIO; portable I/O interface; portable implementation; portable parallel Input/Output interface; portable parallel programming; SGI; software engineering; software portability; Sun shared memory workstations", sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center; URSA/CESDIS", treatment = "P Practical", } @InProceedings{Foster:1996:CDT, author = "I. T. Foster and D. R. {Kohr, Jr.} and R. Krishnaiyer and Choudhary and A.", title = "Communicating data-parallel tasks: an {MPI} library for {HPF}", crossref = "IEEE:1996:ICH", pages = "433--438", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6140D (High level languages)", conftitle = "Proceedings of 3rd International Conference on High Performance Computing (HiPC)", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "data-parallel tasks; FORTRAN; High Performance Fortran; HPF; HPF compiler; MPI library; multiblock application; multidisciplinary simulations; parallel programming; performance; pipeline computations; software performance evaluation; synthetic communication benchmark; task parallelism", sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Foster:1996:DSB, author = "Ian Foster and David R. {Kohr, Jr.} and Rakesh Krishnaiyer and Alok Choudhary", title = "Double Standards: Bringing Task Parallelism to {HPF} Via the Message Passing Interface", crossref = "ACM:1996:SCP", pages = "??--??", year = "1996", bibdate = "Mon Mar 23 12:31:18 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/FOSTER2/INDEX.HTM", acknowledgement = ack-nhfb, } @InProceedings{Foster:1996:GCM, author = "I. Foster and C. Kesselman and M. Snir", title = "Generalized communicators in the {Message Passing Interface}", crossref = "IEEE:1996:PSM", pages = "42--49", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110J (Object-oriented programming); C6110P (Parallel programming); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "application program interfaces; collective communication operations; dynamic endpoint creation; dynamically created threads; endpoint transfer; generalized communicator construct; generalized MPI communicator concept; message passing; Message Passing Interface; multiple communication endpoints; multiple threads; object- oriented programming; object-oriented applications; parallel programming; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Foster:1996:MCL, author = "I. T. Foster and D. R. {Kohr, Jr.} and R. Krishnaiyer", title = "{MPI} as a coordination layer for communicating {HPF} tasks", crossref = "IEEE:1996:PSM", pages = "68--78", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", keywords = "application kernel; application program interfaces; binding; communication interface semantics; communications microbenchmark; coordination library calls; data parallelism; data-parallel languages; distributed array; execution model; explicit message passing; FORTRAN; High Performance Fortran task communication; high- level operations; libraries; library; message passing; Message Passing Interface; MPI coordination layer; parallel languages; parallel program development; parallel programming; performance evaluation; prototype HPF/MPI library; sequential languages; software libraries; software performance evaluation; task parallelism; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Foster:1996:MIW, author = "I. Foster and J. Geisler and S. Tuecke", title = "{MPI} on the {I-WAY}: a wide-area, multimethod implementation of the {Message Passing Interface}", crossref = "IEEE:1996:PSM", pages = "10--17", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620W (Other computer networks); C6110B (Software engineering techniques); C6115 (Programming support); C6130S (Data security); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Argonne Nat. Lab., IL, USA", keywords = "application program interfaces; authentication; automatic configuration mechanisms; communication mechanisms; geographically distributed computing resources; geographically distributed database resources; geographically distributed graphics resources; geographically distributed networking; heterogeneous systems; high-speed wide-area networks; I-WAY distributed- computing experiment; message authentication; message passing; Message Passing Interface; MPICH; Nexus multithreaded runtime system; parallel programming; portable high-performance programming model; process creation; programming environments; software environment; software libraries; utility programs; wide area networks", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Geist:1996:APP, author = "G. A. Geist", title = "Advanced programming in {PVM}", crossref = "Bode:1996:PVM", pages = "1--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "advanced programming; application performance; applications; CUMULVS; distributed computing applications; fault tolerance; interactive; JavaPVM; message passing; parallel computing; parallel programming; Parallel Virtual Machine; performance evaluation; plug-ins; program debugging; PVM; software; software fault tolerance; software packages; TkPVM; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Geist:1996:MEM, author = "A. Geist and W. Gropp and S. Huss-Lederman and A. Lumsdaine and E. Lusk and W. Saphir and T. Skjellum and M. Snir", title = "{MPI-2}: extending the {Message-Passing Interface}", crossref = "Bouge:1996:EPP", pages = "128--135", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5610 (Computer interfaces)", conftitle = "Proceedings of European Conference on Parallel Processing EURO-PAR '96", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "collective operations; computer interfaces; dynamic process management; extensions; external interfaces; language binding; message passing; Message Passing Interface; MPI; MPI-2; MPI-2 document; one-sided operations; real-time computing; standards", treatment = "P Practical", } @TechReport{Geist:1996:VDP, author = "G. A. Geist and James Kohn and Philip Papadopoulos", title = "Visualization, Debugging, and Performance in {PVM}", institution = inst-ORNL, address = inst-ORNL:adr, pages = "11", year = "1996", bibdate = "Tue Jan 16 08:22:10 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/~geist/CapeCod.ps", } @Article{Gennart:1996:CAG, author = "B. A. Gennart and J. {Tarraga Gimenez} and R. D. Hersch", title = "Computer-Assisted Generation of {PVM\slash C++} Programs Using {CAP}", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "259--269", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110F (Formal methods); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150N (Distributed systems software)", corpsource = "Ecole Polytech. Federale de Lausanne, Switzerland", fjournal = "Lecture Notes in Computer Science", keywords = "algorithm parallelization; algorithms; automatic programming; C language; C++; CAP; communication library; computation description; Computer-Aided Parallelization; computer-assisted; computer-assisted C++ program generation; data transfer requirements; formal specification; language extension; machine; message exchange; message passing; MPMD program; object-oriented languages; ordering; parallel; parallel program writing; parallel programming; performance; processors; PVM program generation; sequential code; sequential operation; sequential operations; specification; synchronisation; synchronization; thread execution; thread mapping; threads", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Ghosh:1996:ELM, author = "K. Ghosh and S. Breit", title = "Evaluating the Limits of Message Passing via the Shared Attraction Memory on {CC-COMA} Machines: Experiences with {TCGMSG} and {PVM}", crossref = "ACM:1996:FCP", pages = "173--180", year = "1996", bibdate = "Wed Mar 18 12:33:18 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, annote = "Also known as ICS'96. Held as part of the Federated computing research conference (FCRC'96)", keywords = "ACM; architecture; computer; FCRC; ICS; SIGARCH; supercomputing", } @InProceedings{Gold:1996:UAL, author = "C. Gold and T. Schnekenburger", title = "Using the {ALDY} load distribution system for {PVM} applications", crossref = "Bode:1996:PVM", pages = "278--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "ALDY adaptive load distribution system; ALDY function library; libraries; load distribution strategies; parallel application programming; parallel applications; parallel program processes; parallel programming; PVM applications; resource allocation; software", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Govindan:1996:OMP, author = "V. Govindan and Y. Park and X. Li and S. Crear and O. Johnson", title = "An overview of a {MPI} profiling environment for the {NEC Cenju-3}", crossref = "IEEE:1996:PSM", pages = "185--188", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6115 (Programming support); C6120 (File organisation); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150J (Operating systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "High Performance Comput. Center, Houston Univ., TX, USA", keywords = "application program interface; application program interfaces; data visualisation; dynamic trace buffer management; message passing; Message Passing Interface; MPI applications; MPI profiling environment; MPP research prototype; NEC Cenju-3; NSF Grand Challenge Application Group; operating system; operating systems (computers); parallel machines; program diagnostics; software libraries; storage management; user-driven visualization; virtual memory; virtual storage; visualization tool", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @Article{Gropp:1996:HPM, author = "W. Gropp and E. Lusk", title = "A high-performance {MPI} implementation on a shared-memory vector supercomputer", journal = j-PARALLEL-COMPUTING, volume = "22", number = "11", pages = "1513--??", month = "????", year = "1996", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Mar 18 12:33:29 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gropp:1996:HPP, author = "William Gropp and Ewing Lusk and Nathan Doss and Anthony Skjellum", title = "High-performance, portable implementation of the {MPI} {Message Passing Interface Standard}", journal = j-PARALLEL-COMPUTING, volume = "22", number = "6", pages = "789--828", day = "20", month = sep, year = "1996", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:15:01 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1996&volume=22&issue=6; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1996&volume=22&issue=6&aid=1075", acknowledgement = ack-nhfb, affiliation = "Argonne Natl Lab", affiliationaddress = "Argonne, IL, USA", classification = "722.2; 722.4; 723; 723.1; 723.2; 902.2; C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "applications; Computer programming; Computer software portability; Data communication systems; design goal; distribution; environments; free; future developments; high-performance portable implementation; Interfaces (computer); library writers; message passing; Message passing interface; MPI message; MPI-2; MPICH; parallel computer vendors; Parallel processing systems; parallel programming; Parallel programming environment; passing interface standard; portable parallel programming environment; programming; project management; software libraries; software performance evaluation; software portability; software standards; software tools; specialists; specification; standard library; Standards", treatment = "P Practical", } @InProceedings{Hachler:1996:IAC, author = "G. Hachler and H. Burkhart", title = "Implementing the {ALWAN} communication and data distribution library using {PVM}", crossref = "Bode:1996:PVM", pages = "243--250", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150N (Distributed systems software)", corpsource = "Dept. of Inf., Basel Univ., Switzerland", keywords = "ALWAN communication and data distribution; code generation; CRAY T3D; environment; IBM SP2; INTEL PARAGON; language programming; library; measurements; message passing; mixed-; parallel application programmability; parallel coordination language; parallel languages; parallel programming; performance; performance evaluation; performance portability; programming environments; PVM; reusability; software; software component reuse; software libraries; software portability", pubcountry = "Germany", treatment = "P Practical", } @Article{Haechler:1996:IAC, author = "G. Haechler and H. Burkhart", title = "Implementing the {ALWAN} Communication and Data Distribution Library Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "243--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Heckathorn:1996:SSP, author = "H. Heckathorn and B. Popp and W. Smith and D. Conklin and D. A. Newman and F. Wieland", title = "{SSGM}: from serial to parallel processing using {PVM}", journal = j-PROC-SPIE, volume = "2741", pages = "267--277", month = "????", year = "1996", CODEN = "PSISDG", ISSN = "0277-786X (print), 1996-756X (electronic)", ISSN-L = "0277-786X", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C3240K (Image sensors)C6185 (Simulation techniques); C3360L (Aerospace control); C3375 (Military control systems); C4260 (Computational geometry); C5220P (Parallel architecture); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C6130B (Graphics techniques); C6150N (Distributed systems software); C6160S (Spatial and pictorial databases); C7460 (Aerospace engineering computing)", conflocation = "Orlando, FL, USA; 9-11 April 1996", conftitle = "Technologies for Synthetic Environments: Hardware-in-the-Loop Testing", corpsource = "Div. of Space Sci., Naval Res. Lab., Washington, DC, USA", fjournal = "Proceedings of the SPIE --- The International Society for Optical Engineering", keywords = "aerospace computing; aerospace simulation; computational; computational speed requirements; data visualisation; databases; geometry; guidance; hardware-in-; heterogeneous computers; high-fidelity real-time distributed simulation; high-fidelity scene generation; image; infrared imaging; IR sensor testing; latency; message; message passing system; military computing; military systems; missile; missile defence simulation; model; optical tracking; optimistic; optimistic computing; parallel; parallel machines; parallel processing; parallel virtual machine programming environment; passing; physics-based distributed simulation; physics-based phenomenology models; problems; processing; programming environments; protocols; radar; radar imaging; realistic images; rendering (computer graphics); signatures; surveillance; synchronization; synergistic; synthetic scene generation; target RCS; target tracking; technologies; the-loop simulation; tracking; virtual machines; visual; visualisation", sponsororg = "SPIE", treatment = "P Practical", } @InProceedings{Hempel:1996:APT, author = "R. Hempel and F. Zimmermann", title = "On the automatic {PARMACS-to-MPI} transformation in application programs", crossref = "Liddell:1996:HPC", pages = "1033--1034", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5610 (Computer interfaces); C6150E (General utility programs); C6150N (Distributed systems software); C6155 (Computer communications software)", conftitle = "High-Performance Computing and Networking. International Conference and Exhibition HPCN Europe 1996", corpsource = "German Nat. Res. Center for Inf. Technol., St. Augustin, Germany", keywords = "application program; application program interfaces; computer interfaces; message passing; message passing interface; PARMACS; translation tool", treatment = "P Practical", } @InProceedings{Hempel:1996:SMM, author = "R. Hempel", title = "The status of the {MPI} message-passing standard and its relation to {PVM}", crossref = "Bode:1996:PVM", pages = "14--21", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Parallel Virtual Machine --- EuroPVM '96. Third European PVM Conference. Proceedings", corpsource = "Computations and Commun. Res. Labs., NEC Europe Ltd., Sankt Augustin, Germany", keywords = "application program interfaces; de-facto standard; domain; HPFF; Interface Forum; message passing; Message-Passing; Message-Passing Interface Forum; MPI message-passing standard; MPI-1; MPI-2; parallel; parallel computing; parallel programming; Parallel Virtual Machine; PARMACS; portability interfaces; programming; public; public domain; PVM; software packages; software portability; software standards; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Hong:1996:RDM, author = "Chul-Eui Hong and Bum-Sik Lee and Gi-Won On and Dong-Hae Chi", title = "Replay for debugging {MPI} parallel programs", crossref = "IEEE:1996:PSM", pages = "156--160", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Comput. Div., Electron. and Telecommun. Res. Inst., Taejeon, South Korea", keywords = "application program interfaces; bitonic-merge sort; blocking message passing events; communication errors; cyclic debugging; execution replay algorithm; hazards and race conditions; lexical analyzer; logical time stamping algorithm; merging; message passing; message race conditions; MPI parallel program debugging; MPI standard; nonblocking message passing events; nondeterministic characteristics; parallel programming; program debugging; reference execution; reproducible behavior; software libraries; sorting", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @Article{Huckle:1996:PIS, author = "T. Huckle", title = "{PVM}-Implementation of Sparse Approximate Inverse Preconditioners for Solving Large Sparse Linear Equations", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "166--173", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4130 (Interpolation and function approximation); C4140 (Linear algebra); C6110P (Parallel programming); C7310 (Mathematics computing)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", fjournal = "Lecture Notes in Computer Science", keywords = "access; algorithms; black-box solver; compressed sparse column format; computing; fast; Gram--Schmidt process; householder matrices; iterative methods; iterative solution; large sparse linear equations; least squares approximations; least-; master-slave; mathematics; matrix columns; matrix inversion; matrix multiplication; model; nonsymmetric ill-conditioned matrix; normal equations; parallel; preconditioned conjugate gradient algorithm; preconditioners; PVM implementation; QR-decomposition; sparse approximate inverse; sparse matrices; squares problem; submatrices; unstructured; virtual machines", pubcountry = "Germany", treatment = "P Practical; T Theoretical or Mathematical", } @MastersThesis{Jones:1996:LLM, author = "Chris R. Jones", title = "Low latency {MPI} for {Meiko CS/2} and {ATM} clusters", type = "Thesis (M.A.)", school = "Department of Computer Science, University of California, Santa Barbara", address = "Santa Barbara, CA, USA", year = "1996", LCCN = "QA76.27.C2 S25", bibdate = "Fri Feb 04 17:35:04 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Ju:1996:SPT, author = "Jiubin Ju and Yong Wang", title = "Scheduling {PVM} Tasks", journal = j-OPER-SYS-REV, volume = "30", number = "3", pages = "22--31", month = jul, year = "1996", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Jilin Univ., Changchun, China", fjournal = "Operating Systems Review", keywords = "dynamically produced subtasks; environment; idle workstations; job; parallel programming; pool tasks; processor scheduling; PVM task scheduling; resource utilization; response time; workstation cluster", treatment = "P Practical", } @InProceedings{Juhasz:1996:PIP, author = "Z. Juhasz and D. Crookes", title = "A {PVM} implementation of a portable parallel image processing library", crossref = "Bode:1996:PVM", pages = "188--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6140C (Optical information, image and video signal processing); B6150C (Communication switching); B6210L (Computer communications); C5260B (Computer vision and image processing techniques); C5620L (Local area networks); C6110B (Software engineering techniques); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Dept. of Inf. Syst., Veszprem Univ., Hungary", keywords = "abstract communications layer; asynchronous transfer mode; ATM network-based workstation clusters; communication; Ethernet; extensibility; high-level transparent; image processing; image processing application development; layered; libraries; local area; message passing; message-passing environment; networks; parallel image processing library; parallel programming; Parallel Virtual Machine; parallelism; performance; portable; programming model; PVM implementation; software; software model; software portability; technologies; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Kafura:1996:CCC, author = "D. Kafura and L. Huang", title = "Collective communication and communicators in {mpi++}", crossref = "IEEE:1996:PSM", pages = "79--86", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6120 (File organisation); C6140D (High level languages); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Virginia Polytech. Inst. and State Univ., Blacksburg, VA, USA", keywords = "abstract data types; application program interfaces; attribute caching; C language; C++ language binding; cache storage; class hierarchy; collective communication; collective communicators; collective service; contexts; data structures; groups; Intel Paragon; message passing; MPI; mpi++; mpi++ program; object-oriented languages; parallel algorithm; Sun Sparc workstation; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Kale:1996:PMD, author = "R. P. Kale and M. E. Fleharty and P. M. Alsing", title = "Parallel molecular dynamics visualization using {MPI} with {MPE} graphics", crossref = "IEEE:1996:PSM", pages = "104--110", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A6120J (Computer simulation of static and dynamic liquid behaviour); C6110P (Parallel programming); C6115 (Programming support); C6130B (Graphics techniques); C6150E (General utility programs); C7320 (Physics and chemistry computing)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Chem. and Nucl. Eng., New Mexico Univ., Albuquerque, NM, USA", keywords = "application program interfaces; atomic interactions; boundary-value problems; data visualisation; digital simulation; force decomposition; graphics rendering; IBM SP1; IBM SP2; infinitely replicated confined region; irregular geometries; load balancing; message passing; Message Passing Interface; molecular dynamics method; MPE graphics; MPI Extensions; OpenGL graphics library; parallel molecular dynamics visualization; parallel programming; periodic boundary conditions; physics computing; portable algorithm; real- time 3D object manipulation; real-time systems; rendering (computer graphics); SGI Onyx high-end graphics computer; sockets; software libraries; software portability; workstation clusters; X-Windows calls", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Katkere:1996:VWI, author = "A. Katkere and J. Schlenzig and R. Jain", title = "{VRML-based WWW} interface to {MPI} Video", crossref = "ACM:1996:SVR", pages = "25--31, 137", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6130B (Graphics techniques); C6130M (Multimedia); C6140D (High level languages); C7210 (Information services and centres)", conftitle = "Proceedings of 1995 VMRL Workshop", corpsource = "Visual Comput. Lab., California Univ., San Diego, La Jolla, CA, USA", keywords = "hypermedia; hypermedia markups; interaction metaphor; interactive television; interactive video; Internet; motion information; MPI Video; multiple perspective video streams; on-the-fly updating; page description languages; query processing; simulation languages; standard; three dimensional objects; three dimensional scenes; video data; virtual reality; Virtual Reality Modeling Language; VRML; VRML specification; World Wide Web interface; WWW interface", sponsororg = "San Diego Supercomput. Center; ACM", treatment = "P Practical", } @InProceedings{Kermarrec:1996:PDS, author = "Y. Kermarrec and L. Pautet", title = "Programming Distributed Systems with Both {Ada} 95 and {PVM}", crossref = "Toussaint:1996:AES", pages = "206--216", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6140D (High level languages); C7430 (Computer engineering)", corpsource = "ENST de Bretagne, Brest, France", keywords = "Ada; Ada 95; annex; communication architecture; distributed; distributed system; distributed systems programming; facilities; features; GNAT; low level; parallel; parallel languages; parallel machines; Parallel Virtual Machine; programming; PVM; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Kohl:1996:PTF, author = "J. A. Kohl and G. A. Geist", title = "The {PVM} 3.4 Tracing Facility and {XPVM} 1.1", crossref = "El-Rewini:1996:PTN", volume = "1", pages = "290--299", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Div. of Comput. Sci. and Math., Oak Ridge Nat. Lab., TN, USA", keywords = "buffering; diagnostics; evaluation; event mask; format; graphical user interfaces; heterogeneous environment; library; mechanism; message passing; on-the-fly adjustment; parallel; parallel programming; Parallel Virtual Machine; performance tuning; program; program compilers; program debugging; program execution histories; program monitoring; programming; PVM 3.4; PVM library; run-time; self-defining data; shared-memory multiprocessors; software libraries; software performance; trace; trace event definition; trace events; tracing facility; tracing tool; user-defined custom; virtual machines; workstation clusters; XPVM 1.1", sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm", treatment = "P Practical", } @InProceedings{Kormicki:1996:PLS, author = "M. Kormicki and A. Mahmood and B. S. Carlson", title = "Parallel logic simulation on a network of workstations using {PVM}", crossref = "IEEE:1996:EIS", pages = "2--9", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B1130B (Computer-aided circuit analysis and design); B1265B (Logic circuits); C5210B (Computer-aided logic design); C5440 (Multiprocessing systems); C7410D (Electronic engineering computing)", corpsource = "Washington State Univ., Richland, WA, USA", keywords = "activity level; ATM; balance; CAD; circuit analysis computing; combinational circuits; driven logic simulation algorithm; Ethernet; gate evaluations; high performance; ISCAS; ISCAS combinational benchmark circuits; load; logic; logic testing; network of workstations; output event-; parallel logic simulation; parallel machines; parallel virtual machine; performance; PVM; random partitioning; semi-optimistic scheme; sequential benchmark circuits; sequential circuits; switched; virtual machines", sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput. Architecture; IEEE Comput. Soc. Tech Committee on Distributed Process.; IEEE Comput. Soc. Dallas Chapter", treatment = "A Application; P Practical", } @InProceedings{Kotsis:1996:EEP, author = "G. Kotsis and F. Sukup", title = "Efficiency Evaluation of {PVM 2.X}, {PVM 3.X}, {P4}, {EXPRESS} and {LINDA} on a Workstation Cluster Using the {NAS} Parallel Benchmarks", crossref = "Zaky:1996:PDT", pages = "149--171", year = "1996", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Krantz:1996:RFP, author = "A. T. Krantz and A. Zadroga and S. E. Chodrow and V. S. Sunderam", title = "An {RPC} facility for {PVM}", crossref = "Liddell:1996:HPC", pages = "798--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150N (Distributed systems software)", corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", keywords = "adaptive parallelism; client-server; client-server systems; computing; concurrent computing; distributed applications; failure resilience; heterogeneous environments; message passing; message-; message-passing systems; parallel processing; parallel virtual machine; passing paradigm; processor scheduling; remote procedure call; remote procedure calls; user-transparent load balancing", pubcountry = "Germany", treatment = "T Theoretical or Mathematical", } @InProceedings{Krone:1996:ICF, author = "O. Krone and M. Aguilar and B. Hirsbrunner and V. Sunderam", title = "Integrating Coordination Features in {PVM}", crossref = "Ciancarini:1996:CLM", pages = "432--435", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Inst. d'Inf., Fribourg Univ., Switzerland", keywords = "client/server; coordination; extended coordination; features; generative communication; message passing; parallel programming; parallel systems; programming; PVM", pubcountry = "Germany", treatment = "T Theoretical or Mathematical", } @Article{Lawton:1996:BHP, author = "J. V. Lawton and J. J. Brosnan and M. P. Doyle and S. D. O. Riordain and T. G. Reddin", title = "Building a high-performance message-passing system for {MEMORY CHANNEL} clusters", journal = j-DEC-TECH-J, volume = "8", number = "2", pages = "96--116", month = oct, year = "1996", CODEN = "DTJOEL", ISSN = "0898-901X", bibdate = "Thu Mar 20 18:15:43 MST 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.digital.com:80/DTJM08/DTJM08P8.PS", abstract = "The new MEMORY CHANNEL for PCI cluster interconnect technology developed by Digital (based on technology from Encore Computer Corporation) dramatically reduces the overhead involved in intermachine communication. Digital has designed a software system, the TruCluster MEMORY CHANNEL Software version 1.4 product, that provides fast user-level access to the MEMORY CHANNEL network and can be used to implement a form of distributed shared memory. Using this product, Digital has built a low-level message- passing system that reduces the communications latency in a MEMORY CHANNEL cluster to less than 10 microseconds. This system can, in turn, be used to easily build the communications libraries that programmers use to parallelize scientific codes. Digital has demonstrated the successful use of this message-passing system by developing implementations of two of the most popular of these libraries, Parallel Virtual Machine (PVM) and Message Passing Interface (MPI).", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C6120 (File organisation); C6150N (Distributed systems software)", fjournal = "Digital Technical Journal", keywords = "access; clusters; communications latency; communications libraries; Computer Corporation; distributed shared memory; Encore; high-performance message-passing system; intermachine communication; Machine; MEMORY CHANNEL; message passing; Message Passing Interface; Parallel Virtual; PCI cluster interconnect technology; scientific codes; software; storage management; system; TruCluster MEMORY CHANNEL Software; user-level", treatment = "P Practical", } @Article{Lee:1996:TSF, author = "Bu-Sung Lee and A. Heng and W. Cai and Tai-Ann Tan", title = "Task scheduling facility for {PVM}", journal = j-PARALLEL-PROCESS-LETT, volume = "6", number = "4", pages = "563--574", month = dec, year = "1996", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Tue Oct 21 18:27:39 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110B (Software engineering techniques); C6150N (Distributed systems software)", corpsource = "Sch. of Appl. Sci., Nanyang Technol. Univ., Singapore", fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", keywords = "centralized task scheduler; client server system; client-server systems; design issue; heterogeneous computer systems; library routines; load balancing; parallel machines; Parallel Virtual Machine; PVM; resource allocation; round-robin task allocation scheme; scheduling; software libraries; software portability; task scheduling; virtual machines; virtual metacomputer; workstations", pubcountry = "Singapore", treatment = "P Practical", } @InProceedings{Liang:1996:AEO, author = "Wen-Yew Liang and Chun-Ta King and Feipei Lai", title = "{Adsmith}: an efficient object-based distributed shared memory system on {PVM}", crossref = "Li:1996:SIS", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6110J (Object-oriented programming)", corpsource = "Dept. of Comput. Sci. and Inf. Eng., Nat. Taiwan Univ., Taipei, Taiwan", keywords = "accesses; Adsmith; atomic operations; communication subsystem; consistency; distributed memory systems; distributed shared memory system; load/store-like memory accesses; memory; memory systems; nonblocking; object-oriented programming; parallel architectures; performance; performance evaluation; PVM; release memory; shared; shared objects", sponsororg = "Chinese Nat. Res. Center for Intelligent Comput. Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; Steering Committee of the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc. Japan; Chinese Comput. Federation; IEICE Inf. and Syst. Soc", treatment = "P Practical", } @InProceedings{Liu:1996:BMP, author = "L. T. Liu and D. E. Culler and C. Yoshikawa", title = "Benchmarking message passing performance using {MPI}", crossref = "Reeves:1996:PIC", volume = "1", pages = "101--110", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", conftitle = "Proceedings of 25th International Conference on Parallel Processing", corpsource = "Comput. Sci. Div., Berkeley Univ., CA, USA", keywords = "benchmarks; IBM SP2; Intel Paragon; message passing; message passing performance; microbenchmarks; MPI; parallel machines; performance evaluation; SGI Power Challenge", sponsororg = "Int. Assoc. Comput. and Commun.; Pennsylvania State Univ", treatment = "P Practical", } @InProceedings{Loos:1996:MPS, author = "T. Loos and R. Bramley", title = "{MPI} performance on the {SGI Power Challenge}", crossref = "IEEE:1996:PSM", pages = "203--206", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", keywords = "application program interfaces; collective inter-processor communication; communications efficiency; communications overhead; communications tests; cost function; double precision arrays; graph partitioning algorithm; memory copying; memory performance curves; memory tests; message passing; MPI performance; MPI performance curves; MPI standard; parallel algorithms; parallel computers; performance evaluation; point-to-point inter-processor communication; primitives; second level cache; SGI Power Challenge; shared memory multiprocessor; shared memory systems; software performance evaluation; synchronisation; synchronization; total message sizes; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Lu:1996:PIF, author = "E. J.-L. Lu and D. I. Okunbor", title = "Parallel implementation of {3D FMA} using {MPI}", crossref = "IEEE:1996:PSM", pages = "119--124", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0270 (Computational techniques); A0320 (Classical mechanics of discrete systems: general mathematical aspects); A0545 (Theory and models of chaotic systems); A9510C (Celestial mechanics); A9575P (Mathematical and computer techniques in astronomy); C4240C (Computational complexity); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6150E (General utility programs)C7330 (Biology and medical computing); C6150N (Distributed systems software); C7320 (Physics and chemistry computing); C7350 (Astronomy and astrophysics computing)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Missouri Univ., Rolla, MO, USA", keywords = "3D fast multipole algorithm; application program interfaces; astronomy computing; astrophysics; biochemistry; biology computing; biomolecular dynamics; biophysics; chaos; chaotic characteristics; chemistry computing; communication back-end; communication overhead; computational complexity; digital simulation; galactic system; load balancing; long-range force calculation; message passing; Message Passing Interface; MPI; N-body problems; N-body systems simulation; parallel algorithms; parallel implementation; partitioning technique; physics computing; portable scalable parallel library; resource allocation; time complexity", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Manis:1996:EPT, author = "G. Manis and C. Voliotis and P. Tsanakas and G. Papakonstantinou", title = "Enhancing {PVM} with threads in distributed programming", crossref = "Liddell:1996:HPC", pages = "1013--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150N (Distributed systems software)", corpsource = "Athens Nat. Tech. Univ., Greece", keywords = "distributed programming; environment; Orchid; parallel programming; platform; portable features; PVM; software; software portability; thread-oriented PVM; threads", pubcountry = "Germany", treatment = "G General Review; P Practical", } @InProceedings{Markus:1996:PEM, author = "S. Markus and S. B. Kim and K. Pantazopoulos and A. L. Ocken and E. N. Houstis and P. Wu and S. Weerawarana and D. Maharry", title = "Performance evaluation of {MPI} implementations and {MPI} based {Parallel ELLPACK} solvers", crossref = "IEEE:1996:PSM", pages = "162--169", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C4170 (Differential equations); C4185 (Finite element analysis); C6150N (Distributed systems software); C7310 (Mathematics computing)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Purdue Univ., West Lafayette, IN, USA", keywords = "application program interfaces; distributed memory architectures; domain decomposition; elliptic boundary value problems; elliptic equations; finite difference methods; finite element mesh generation; iterative solvers; ITPACK library; mathematics computing; mesh generation; mesh partitioning; message passing; message passing communication libraries; MIMD; MPI; Parallel ELLPACK; parallel mesh generator; partial differential equations; problem solving environment; PVM; second order elliptic partial differential equations; software libraries; software performance evaluation; sparse algebraic equations; sparse matrices; workstation clusters", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Martin:1996:WTW, author = "D. E. Martin and T. J. McBrayer and P. A. Wilsey", editor = "H. El-Rewini and B. D. Shriver", booktitle = "{Proceedings of the Twenty-Ninth Hawaii International Conference on System Sciences}", title = "{WARPED}: a time warp simulation kernel for analysis and application development", volume = "1", publisher = "????", address = "????", pages = "5--??", year = "1996", ISBN = "0-8186-7324-9", ISBN-13 = "978-0-8186-7324-5", LCCN = "????", bibdate = "Sun Apr 13 12:29:32 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "WARPED is a publicly-available time warp simulation kernel for experimentation and application development. The kernel defines a standard interface to the application developer and is designed to provide a highly configurable environment for the integration of time warp optimizations. It is written in C++, uses the MPI (Message Passing Interface) standard and shared memory for communication, and executes on a variety of platforms including a network of SUN workstations, a SUN SMP workstation, the IBM SP1/SP2 multiprocessors, the Intel Paragon and IBM-compatible PCs running Linux. WARPED is distributed with several applications and includes a sequential kernel implementation for comparative analysis. The kernel supports LP (logical process) clustering, various time warp algorithms and several optimizations that dynamically adjust simulation parameters.", acknowledgement = ack-nhfb, classcodes = "C6185 (Simulation techniques); C6115 (Programming support); C6150N (Distributed systems software); C6110P (Parallel programming)", conflocation = "Wailea, HI, USA; 3-6 Jan. 1996", conftitle = "Proceedings of HICSS-29: 29th Hawaii International Conference on System Sciences", corpsource = "Dept. of ECECS, Cincinnati, OH, USA", keywords = "analysis; application; application program interfaces; applications; C++; comparative; configurable environment; development; development systems; discrete event simulation; dynamic simulation; IBM compatible; IBM SP1/SP2 multiprocessors; Intel Paragon; Linux; logical process clustering; memory systems; message passing; Message Passing Interface; microcomputer; MPI standard; optimisation; optimizations; parallel algorithms; parameter adjustment; PCs; sequential kernel implementation; shared; shared memory; SUN SMP workstation; SUN workstation network; synchronisation; time warp; time warp simulation; time warp simulation kernel; WARPED", sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm", treatment = "P Practical", } @InProceedings{McCandless:1996:OOM, author = "B. C. McCandless and J. M. Squyres and A. Lumsdaine", title = "Object Oriented {MPI} ({OOMPI}): a class library for the {Message Passing Interface}", crossref = "IEEE:1996:PSM", pages = "87--94", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110F (Formal methods)C6140D (High level languages); C6110J (Object-oriented programming); C6110P (Parallel programming); C6115 (Programming support); C6150E (General utility programs)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci. and Eng., Notre Dame Univ., IN, USA", keywords = "application program interfaces; C language; C++ bindings; C++ class library; formal specification; generic specification; message passing; Message Passing Interface; object-oriented class library; object-oriented languages; Object-Oriented MPI; object-oriented programming; OOMPI; parallel programming; program description language; software libraries", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{McDonald:1996:NNP, author = "K. McDonald", title = "The {NAG Numerical PVM Library}", crossref = "Dongarra:1996:APC", pages = "419--428", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4100 (Numerical analysis); C4240P (Parallel programming and algorithm theory); C5220P (Parallel architecture); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Numerical Algorithms Group Ltd., Oxford, UK", keywords = "analysis; distributed memory systems; distributed-memory; efficient software; general-purpose numerical library; machines; message passing; NAG Fortran 77 Library; NAG Numerical PVM Library; numerical; Numerical Algorithms Group; parallel algorithms; parallel programming; parallel software; portable; public-domain message-passing; ScaLAPACK project; scalar computers; shared-memory computers; software; software libraries; software portability; state-of-the-art; systems; vector computers", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{McMahon:1996:EEE, author = "T. P. McMahon and A. Skjellum", title = "{eMPI\slash eMPICH}: embedding {MPI}", crossref = "IEEE:1996:PSM", pages = "180--184", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6115 (Programming support); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", keywords = "application program interface; application program interfaces; bottom-up design; design paradigms; embeddable libraries; embeddable MPI versions; eMPI; eMPICH; memory-constrained systems; message passing; real-time systems; software libraries; systems analysis; top-down design", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Menden:1996:PPP, author = "J. Menden and G. Stellner", title = "Proving properties of {PVM} applications --- a case study with {CoCheck}", crossref = "Bode:1996:PVM", pages = "134--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110F (Formal methods); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "case study; checkpoint; CoCheck; creation; distributed algorithms; formal method; machines; parallel applications; parallel programming; Parallel Virtual Machine; program; programming theory; properties; proving; PVM applications; systems software; verification; virtual; workstation clusters", pubcountry = "Germany", treatment = "P Practical; T Theoretical or Mathematical", } @Article{Miei:1996:IER, author = "T. Miei and N. Takahashi", title = "Implementation and evaluation of a replay-based debugger for {PVM} programs", journal = j-TRANS-INFO-PROCESSING-SOC-JAPAN, volume = "37", number = "7", pages = "1308--1319", month = jul, year = "1996", CODEN = "JSGRD5", ISSN = "0387-5806", ISSN-L = "0387-5806", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", fjournal = "Transactions of the Information Processing Society of Japan", keywords = "code; dbxR; demand-driven replay method; dynamic execution sequences; message passing; message passing communications; message passing library; nondeterministic execution behavior; parallel programming; parallel programs; program debugging; program debugging evaluation; PVM programs; replay-based debugger; shared memory systems; shared-memory parallel programs; software performance evaluation; static source", language = "Japanese", pubcountry = "Japan", treatment = "P Practical", } @Article{Miguel:1996:APN, author = "Jose Miguel and Agustin Arruabarrena and Ramon Beivide and Jose Angel Gregorio", title = "Assessing the performance of the new {IBM SP2} communication subsystem", journal = j-IEEE-PAR-DIST-TECH, volume = "4", number = "4", pages = "12--22", month = "Winter", year = "1996", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/88.544433", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Fri Apr 11 07:24:28 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Universidad del Pais Vasco", affiliationaddress = "Spain", classification = "716.1; 721.1; 722.2; 722.4; 723; 912.3; C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Archit. and Technol., Univ. del Pais Vasco, San Sebastian, Spain", fjournal = "IEEE parallel and distributed technology: systems and applications", journalabr = "IEEE Parallel Distrib Technol", keywords = "Bandwidth; basic; collective communication; Communication channels (information theory); communication tests; Computer software; Computer testing; computers; distributed memory systems; evaluation; execution; execution time; Execution times; fault tolerant computing; high performance switch; IBM; IBM SP2 communication subsystem; Interconnection networks; interface adapters; Interfaces (computer); latency; Memory latency; message; message passing; Message passing interface (MPI); Microprocessor chips; MPI message passing library; parallel applications; parallel computer; parallel machines; Parallel processing systems; Parallel virtual machine (pvm); performance; performance assessment; performance indicators; real applications; reliability; SP2; Switching; Synchronization; Systems analysis; tests; throughput; times", treatment = "P Practical", } @InProceedings{Mo:1996:IOP, author = "J. Mo and F. Romelfanger and R. J. Hanisch and D. Redding and S. Sirlin and A. Boden", title = "Implementation of an optical prescription retrieval code using {PVM} (parallel virtual machine) in a mixed architecture network", crossref = "Jacoby:1996:ADA", pages = "100--103", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A9575P (Mathematical and computer techniques in astronomy); C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7350 (Astronomy and astrophysics computing); C7430 (Computer engineering)", corpsource = "Space Telescope Sci. Inst., Baltimore, MD, USA", keywords = "astronomy computing; machine; mixed architecture network; optical prescription retrieval code; parallel; parallel computing application; parallel machines; parallel virtual; performance comparisons; programming; PVM software system; virtual machines", treatment = "X Experimental", } @InProceedings{Muller:1996:CDI, author = "A. Muller and R. Ruhl", title = "Communication-buffers for data-parallel, irregular computations", crossref = "Szymanski:1996:LCR", pages = "295--298", year = "1996", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Centro Svizzero di Calcolo Sci., Eidgenossische Tech. Hochschule, Manno, Switzerland", classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6120 (File organisation); C6140D (High level languages); C6150C (Compilers, interpreters and other processors)", keywords = "Buffer organizations; Common user interface; Communication buffers; Compiler generated parallel programs; Critical code segments; Data parallel irregular computations; Data-parallel irregular computations; Distributed data; Distributed memory parallel processors; Distributed programs; High Performance Fortran compiler; HPF extensions; Low level machine interface; Message Passing Interface; Parallelization Support Tool; Performance monitor; Portable integrated tool environment Annai; PST HPF extensions; Run time preprocessing; Source level debugger; Unstructured computations", thesaurus = "Buffer storage; Distributed memory systems; FORTRAN; Parallel languages; Parallel programming; Parallelising compilers", } @Article{Nagel:1996:VVA, author = "W. E. Nagel and A. Arnold and M. Weber and H. C. Hoppe and K. Solchenbach", title = "{VAMPIR}: Visualization and Analysis of {MPI} Resources", journal = j-SUPERCOMPUTER, volume = "12", number = "1", pages = "69--80", month = jan, year = "1996", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Central Inst. for Appl. Math., Res. Centre Julich, Germany", classification = "C6110P (Parallel programming); C6110S (Software metrics); C6150N (Distributed systems software); C6155 (Computer communications software)", corpsource = "Central Inst. for Appl. Math., Res. Centre Julich, Germany", fjournal = "Supercomputer", keywords = "activity chart; Activity chart; analysis; Analysis; animation mode; Animation mode; computer interfaces; flexible filter operation; Flexible filter operation; information display reduction; Information display reduction; message passing; message passing interface; Message passing interface; message passing standard; Message passing standard; MPI; MPI resource; parallel programming; Parallel programming; PARvis; performance bottleneck location; Performance bottleneck location; software performance analysis; Software performance analysis; software performance evaluation; state diagram; State diagram; statistics; Statistics; time-line displays,; Time-line displays,; tracing; Tracing; VAMPIR; VAMPIR:; visualization; Visualization; zooming; Zooming", pubcountry = "Netherlands", thesaurus = "Computer interfaces; Message passing; Parallel programming; Software performance evaluation", treatment = "P Practical", } @InProceedings{NicCanna:1996:LGS, author = "C. {Nic Canna} and C. J. Bean", title = "Larger grids and shorter wall-clock times on a parallel virtual machine ({PVM}) --- an example using a finite difference wave simulation algorithm", crossref = "Abrahart:1996:GIC", volume = "2", pages = "2--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4170 (Differential equations); C6150N (Distributed systems software); C6185 (Simulation techniques); C7340 (Geophysics computing); C7430 (Computer engineering)", corpsource = "Dept. of Geol., Univ. Coll. Dublin, Ireland", keywords = "acoustic wave equation; acoustic waves; algorithm; array sizes; computer modelling; digital simulation; Earth; finite difference; finite difference solution; finite difference wave simulation; geologically realistic; geophysics computing; machines; message passing; methods; parallel; parallel virtual machine; PVM message passing library; sciences; seismic wave; seismology; simulation method; virtual machines; virtually parallel machine; wall clock times; wave equations", pubcountry = "UK", treatment = "P Practical", } @InProceedings{Oberhuber:1996:MNP, author = "M. Oberhuber", title = "Managing nondeterminism in {PVM} programs", crossref = "Bode:1996:PVM", pages = "347--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "interprocess communication; nondeterminism; parallel machines; parallel programming; parallel programs; program debugging; program testing; PVM programs; TOOLSET environment; virtual machines", pubcountry = "Germany", treatment = "T Theoretical or Mathematical", } @InProceedings{Ogawa:1996:OOM, author = "Hirotaka Ogawa and Satoshi Matsuoka", title = "{OMPI}: Optimizing {MPI} Programs Using Partial Evaluation", crossref = "ACM:1996:SCP", pages = "??--??", year = "1996", bibdate = "Mon Mar 23 12:31:18 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/OGAWA/INDEX.HTM", acknowledgement = ack-nhfb, } @InProceedings{Papakostas:1996:PPP, author = "N. Papakostas and G. Papakonstantinou and P. Tsanakas", title = "{PPARDB\slash PVM}: a portable {PVM} based parallel database management system", crossref = "Boszormenyi:1996:PCT", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6160B (Distributed databases)C6110P (Parallel programming); C6160D (Relational databases); C7430 (Computer engineering)", corpsource = "Dept. of Electr. and Comput. Eng., Nat. Tech. Univ. of Athens, Greece", keywords = "architecture; computational model; crowd; database processing elements; databases; distributed databases; heterogeneous workstation; horizontal; layered; multicasting; network; one master/multiple slaves; operating system dependencies; operator parallelism; parallel database management system; parallel programming; partitioning; portable; portable PVM based parallel database management; PPARDB/PVM; process synchronisation; relation tuples; relational; relational parallel database management system; relationship; scientific programming; shared nothing; software portability; system; tasks; transputer network; virtual machines; virtual parallel computer; workstations", pubcountry = "Germany", treatment = "P Practical", } @Article{Papakostas:1996:PSP, author = "N. Papakostas and G. Papakonstantinou and P. Tsanakas", title = "{PPARDB} \slash{ PVM}: a Portable {PVM} Based Parallel Database Management System", journal = j-LECT-NOTES-COMP-SCI, volume = "1127", pages = "219--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Papakostas:1996:UPI, author = "N. Papakostas and G. Papakonstantinou and P. Tsanakas", title = "Using {PVM} to implement {PPARDB\slash PVM}, a portable parallel database management system", crossref = "Bode:1996:PVM", pages = "108--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110P (Parallel programming)C7430 (Computer engineering); C6160B (Distributed databases)", corpsource = "Dept. of Electr. Eng., Nat. Tech. Univ. of Athens, Greece", keywords = "distributed databases; heterogeneous workstation network; operator parallelism; parallel programming; parallel systems; portability; portable communication primitives; portable parallel database management; PPARDB/PVM; PVM; separate database node; shared nothing architecture; software; system; virtual machines; workstations", pubcountry = "Germany", treatment = "P Practical", } @Article{Pernice:1996:RPP, author = "Michael Pernice", title = "Review of ``{PVM: Parallel Virtual Machine. A User's Guide and Tutorial for Networked Parallel Computing}''", journal = j-IEEE-PAR-DIST-TECH, volume = "4", number = "1", pages = "84--84", month = "Spring", year = "1996", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/M-PDT.1996.481711", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Tue Jan 23 16:38:43 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/pd/books/pd1996/pdf/p1084.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE parallel and distributed technology: systems and applications", } @Article{Pokorny:1996:CMP, author = "S. Pokorny", title = "A Comparison of Message-Passing Parallelization to Shared-Memory Parallelization", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "22--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pruyne:1996:ICP, author = "Jim Pruyne and Miron Livny", title = "Interfacing {Condor} and {PVM} to harness the cycles of workstation clusters", journal = j-FUT-GEN-COMP-SYS, volume = "12", number = "1", pages = "67--85", month = may, year = "1996", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jul 15 09:06:07 MDT 2005", bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, classification = "C5620 (Computer networks and techniques); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Madison, WI, USA", fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "allocation; Condor; network operating systems; parallel; parallel programming; processing; PVM; resource; resource management; resource management system; workstation clusters", pubcountry = "Netherlands", remark = "Resource Management in Distributed Systems", treatment = "P Practical", } @Article{Qaddouri:1996:CPC, author = "A. Qaddouri and R. Roy and M. Mayrand and B. Goulard", title = "Collision Probability Calculation and Multigroup Flux Solvers Using {PVM}", journal = j-NUCL-SCI-ENG, volume = "123", number = "3", pages = "392--402", month = jul, year = "1996", CODEN = "NSENAO", ISSN = "0029-5639", ISSN-L = "0029-5639", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0260 (Numerical approximation and analysis); A2820H (Neutron diffusion); A2841C (Computer codes for fission reactor theory and design)", conflocation = "Portland, OR, USA; 30 April-4 May 1995", conftitle = "International Conference on Mathematics and Computations, Reactor Physics, and Environmental Analyses", corpsource = "Inst. de Genie Nucl., Ecole Polytech. de Montreal, Que., Canada", fjournal = "Nuclear Science and Engineering", keywords = "collision probability; cyclic; IBM SP2; iterative methods; iterative process; linearized; multigroup flux solvers; multigroup transport equation; neutron flux; neutron transport theory; nuclear engineering computing; PVM library; run times; SPARC 1000; time-independent transport equation; tracking; two-step energy/space", sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan", treatment = "T Theoretical or Mathematical", } @InProceedings{Ragg:1996:PEN, author = "T. Ragg", title = "Parallelization of an evolutionary neural network optimizer based on {PVM}", crossref = "Bode:1996:PVM", pages = "351--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C5290 (Neural computing techniques); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Institut fur Logik, Karlsruhe Univ., Germany", keywords = "allocation; batch processing (computers); batch program; dynamic load balancing; ENZO; evolutionary neural network optimizer; genetic algorithms; machine load; nets; neural; parallel algorithms; parallelization; pattern recognition; PVM; resource; workstation-cluster", pubcountry = "Germany", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Reimann:1996:CBT, author = "D. A. Reimann and V. Chaudhary and M. J. Flynn and I. K. Sethi", title = "Cone beam tomography using {MPI} on heterogeneous workstation clusters", crossref = "IEEE:1996:PSM", pages = "142--148", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C5620L (Local area networks); C6150N (Distributed systems software); C7410H (Computerised instrumentation)", conftitle = "Proceedings. Second MPI Developer's Conference", keywords = "application program interfaces; asynchronous communication; asynchronous MPI; backprojection; computerised tomography; cone beam tomography; heterogeneous workstation clusters; image reconstruction; load balancing; local area networks; memory requirements; message passing; Message Passing Interface; MPI; parallel methods; processing time; processor utilization; projection views; resource allocation; software libraries", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical; T Theoretical or Mathematical", } @InProceedings{Robinson:1996:TMI, author = "J. Robinson and S. H. Russ and B. Flachs and B. Heckel", title = "A task migration implementation of the {Message-Passing Interface}", crossref = "IEEE:1996:PFI", pages = "61--68", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150E (General utility programs); C6150N (Distributed systems software)", conftitle = "Proceedings of 5th IEEE International Symposium on High Performance Distributed Computing", corpsource = "NSF Eng. Res. Center for Comput. Field Simulation, Mississippi State Univ., MS, USA", keywords = "application program interfaces; Hector; heterogeneous computing task allocator; heterogeneous platforms; message passing; Message-Passing Interface; MPI specification; parallel processing applications; parallel program performance improvement; parallel programming; software performance evaluation; task migration implementation; workstation networks", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process.; Northeast Parallel Architectures Center; New York State Center for Adv. Technol. Comput. Applications and Software Eng. (CASE Center) at Syracuse Univ.; Rome Lab", treatment = "P Practical", } @InProceedings{Roda:1996:PEI, author = "J. Roda and J. Herrera and J. Gonzalez and C. Rodriguez and F. Almeida and D. Gonzalez", title = "Practical experiments to improve {PVM} algorithms", crossref = "Bode:1996:PVM", pages = "30--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150N (Distributed systems software); C6155 (Computer communications software); C7430 (Computer engineering)", corpsource = "Univ. de La Laguna, Spain", keywords = "broadcasting strategies; computer communications software; intensive communication experiments; LAN; local area networks; measurement; network parameter; parallel processing; parallel virtual machine; PVM algorithms; pvm.mcast; pvm.send; virtual machines", pubcountry = "Germany", treatment = "X Experimental", } @InProceedings{Russ:1996:HAT, author = "S. H. Russ and B. Flachs and J. Robinson and B. Heckel", title = "Hector: automated task allocation for {MPI}", crossref = "IEEE:1996:PII", pages = "344--348", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5620L (Local area networks); C6110P (Parallel programming); C6150N (Distributed systems software)", conftitle = "Proceedings of International Conference on Parallel Processing", corpsource = "Mississippi State Univ., MS, USA", keywords = "automated task allocation; automatic run-time performance optimization; C; computational resources; FORTRAN; Hector; local area networks; message passing; message- passing; MPI; multiprocessing systems; parallel processing; parallel programming; resource allocation; software performance evaluation; task migration; workstation networks", sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Santana:1996:PVM, author = "M. S. Santana and P. S. Souza and R. C. Santana and S. S. Souzza", title = "{Parallel Virtual Machine} for {Windows95}", crossref = "Bode:1996:PVM", pages = "288--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Inst. de Ciencias Math., Sao Carlos, Brazil", keywords = "(computers); communication; local area networks; message; message passing; network; operating systems; parallel applications; parallel programming; Parallel Virtual Machine for Windows 95; passing environment; personal computers; PVM-W95; speedups; Windows 95 operating system; workstations", pubcountry = "Germany", treatment = "P Practical", xxauthor = "M. S. Santana and R. C. Santana and P. S. Souza and S. S. Souza", } @Article{Schuele:1996:PLA, author = "J. Schuele", title = "Parallel {Lanczos} Algorithm on a {CRAY-T3D} Combining {PVM} and {SHMEM} Routines", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "158--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Schule:1996:PLA, author = "J. Schule", title = "Parallel {Lanczos} algorithm on a {CRAY-T3D} combining {PVM} and {SHMEM} routines", crossref = "Bode:1996:PVM", pages = "158--165", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A7130 (Metal-insulator transitions); A7155J (Localization in disordered structures); C4140 (Linear algebra); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", corpsource = "Inst. for Sci. Comput., Braunschweig, Germany", keywords = "Anderson model; Anderson-; arithmetic; coarse-grain; Cray computers; CRAY-; cubic tungsten bronzes; data distribution; dynamic group; effectiveness; eigenfunctions; eigenvalues and; eigenvector calculation; fine-grain; finite precision; Fock approach; Hamiltonian matrix; HF calculations; Hubbard model; iterations; iterative; Krylov sequence; load imbalance; machines; master-slave paradigm; matrix algebra; membership; metal-insulator transition; methods; Mott-Hubbard model; NaWO/sub 3/; parallel algorithms; parallel Lanczos algorithm; parallelisation strategies; parallelism; physics computing; PVM; resource allocation; rounding errors; roundoff errors; SHMEM routines; sodium compounds; software development costs; stochastic distributions; subroutines; T3D; tridiagonalisation; unrestricted Hartree-; virtual", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Sener:1996:DPP, author = "C. Sener and Y. Paker and A. Kiper", title = "Data-parallel programming on {Helios}, parallel environment and {PVM}", crossref = "Yetongnon:1996:PII", volume = "1", pages = "2--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6120 (File organisation); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Eng., Middle East Tech. Univ., Ankara, Turkey", keywords = "C language interface; column-sums; computational complexity; data; data flow computing; data-parallel; data-parallel programming; environments; evaluation; Helios operating system; IBM SP/2 system; image processing; matrix; message passing; network; operating systems; parallel environment; parallel programming; Parallel Virtual Machine; performance; portability; programming; programming tool; PVM; resource allocation; software performance; software portability; software tools; speed-up curves; structures; transputers; type; virtual machines", sponsororg = "ISCA; IEEE Comput. Soc.; IEEE Tech. Committee on Operating Syst.; et al", treatment = "P Practical", } @Article{Shyu:1996:ILQ, author = "Shyong Jian Shyu and H. K.-C. Chang and K.-C. Chou", title = "Implementation of a linear quadtree coding scheme on the parallel virtual machine", journal = j-INT-J-HIGH-SPEED-COMPUTING, volume = "8", number = "1", pages = "65--79", month = mar, year = "1996", CODEN = "IHSCEZ", ISSN = "0129-0533", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6140C (Optical information, image and video signal processing); C1250 (Pattern recognition); C5260B (Computer vision and image processing techniques); C5440 (Multiprocessing systems); C6120 (File organisation); C7430 (Computer engineering)", corpsource = "Dept. of Inf. Manage., Ming Chuan Univ., Taipei, Taiwan", fjournal = "International Journal of High Speed Computing", keywords = "data; encoding; high; image encoding problem; image manipulations; image processing; image processing problems; linear quadtree coding; master-slave paradigm; parallel machines; parallel virtual machine; performance computing; quadtrees; structure; virtual machines", pubcountry = "Singapore", treatment = "A Application; P Practical", } @InProceedings{Silva:1996:IDS, author = "L. M. Silva and J. G. Silva and S. Chapple", title = "Implementing distributed shared memory on top of {MPI}: the {DSMPI} library", crossref = "IEEE:1996:PFE", pages = "50--57", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", conftitle = "Proceedings of 4th Euromicro Workshop on Parallel and Distributed Processing", corpsource = "Coimbra Univ., Portugal", keywords = "coherence protocols; consistency; Cray T3D; distributed memory machines; distributed memory systems; distributed shared memory; DSMPI library; message passing; MPI; parallel library; parallel programming; performance; programming interface; scalability; shared memory systems; software libraries; software performance evaluation; software portability; workstation network", treatment = "P Practical", } @InProceedings{Sitsky:1996:IMU, author = "D. Sitsky and P. Mackerras and A. Tridgell and D. Walsh", title = "Implementing {MPI} under {AP\slash Linux}", crossref = "IEEE:1996:PSM", pages = "32--39", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "A preliminary MPI library has been implemented for the Fujitsu AP1000+ multicomputer running the AP/Linux operating system. Under this environment, parallel programs may be dedicated to a fixed partition, or a number of parallel programs may share a partition. Therefore, the MPI library has been constructed so that messaging operations can be driven by polling and/or interrupt techniques. It has been found that polling works well when a single parallel program is running on a given partition, and that interrupt-driven communication makes far better use of the machine when multiple parallel programs are executing. Gang scheduling of multiple parallel programs which use polling was found to be relatively ineffective.", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6140D (High level languages); C6150E (General utility programs); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150J (Operating systems)C6115 (Programming support); C6150N (Distributed systems software)", conflocation = "Notre Dame, IN, USA; 1-2 July 1996", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "CAP Res. Program, Australian Nat. Univ., Canberra, ACT, Australia", keywords = "AP/Linux; AP/Linux operating system; application program interfaces; communication; debugging; distributed memory systems; extensions; Fujitsu AP1000+ multicomputer; gang scheduling; interrupt techniques; interrupt-driven; interrupt-driven communication; interrupts; language issues; libraries; message passing; Message Passing Interface standard; messaging operations; MPI applications; MPI implementations; MPI library; multiple parallel programs; operating system; operating systems (computers); parallel languages; parallel programming; parallel programs; partition; performance evaluation; performance portable parallel programming; polling; processor scheduling; program debugging; software; software libraries; software performance evaluation; software standards; utility programs", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Sitsky:1996:MLW, author = "D. Sitsky and E. Hayashi", title = "An {MPI} library which uses polling, interrupts and remote copying for the {Fujitsu AP1000+}", crossref = "Li:1996:PSI", pages = "43--49", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5470 (Performance evaluation and testing); C6110B (Software engineering techniques); C6115 (Programming support); C6150N (Distributed systems software)", conftitle = "Proceedings Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN'96)", corpsource = "Dept. of Comput. Sci., Australian Nat. Univ., Canberra, ACT, Australia", keywords = "Fujitsu AP1000+; interrupts; message passing; MPI library; parallel architectures; performance; performance evaluation; polling; remote copying; software libraries; software portability", sponsororg = "Chinese Nat. Res. Center for Intelligent Comput. Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; Steering Committee of the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc. Japan; Chinese Comput. Federation; IEICE Inf. and Syst. Soc", treatment = "P Practical", } @InProceedings{Sivaraman:1996:AAD, author = "H. Sivaraman and C. S. Raghavendra", title = "{ADDT}: Automatic Data Distribution Tool for Porting Programs to {PVM}", crossref = "El-Rewini:1996:PTN", volume = "1", pages = "557--564", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6140D (High level languages); C6150C (Compilers, interpreters and other processors); C6150N (Distributed systems software)", corpsource = "School of Electr. Eng. and Comput. Sci., Washington State Univ., Pullman, WA, USA", keywords = "ADDT; automatic data distribution tool; communication latency; communication statements; data access; data parallel languages; data partitioning; distributed memory; distributed memory systems; distribution blocks; environment; FORTRAN; Fortran; High Performance; HPF; interpreters; languages; linear optimization problem; nonlocal; optimisation; parallel; parallel programming; program; program compilers; program porting; PVM; shared memory parallel program; shared memory systems; software portability", sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm", treatment = "P Practical", } @InProceedings{Skjellum:1996:TTM, author = "A. Skjellum and B. Protopopov and S. Hebert", title = "A thread taxonomy for {MPI}", crossref = "IEEE:1996:PSM", pages = "50--57", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110F (Formal methods); C6150E (General utility programs); C6150J (Operating systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS, USA", keywords = "API extensions; application program interfaces; Channel Device; computational unit; fine-grain concurrency; formal specification; message passing; minimal portable thread management; MPI; MPICH; multi-threaded thread-safe ADI; non-thread-safe MPI call semantics; resource container; software portability; synchronisation; synchronization mechanisms; thread models; thread safety; thread taxonomy; user-level mechanism; utility programs; Windows NT version", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Smith:1996:UWC, author = "N. P. G. Smith and C. Christopoulos", title = "Utilising workstation clusters with {PVM} for the solution of large {TLM} problems", crossref = "Silvester:1996:SEE", pages = "3--11", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B1130B (Computer-aided circuit analysis and design); B5240 (Transmission line theory); C6110P (Parallel programming); C7410 (Electrical engineering computing)", corpsource = "Numerical Modelling Group, Nottingham Univ., UK", keywords = "data; electrical engineering computing; load balancing; parallel computing; parallel programming; Parallel Virtual Machine; partitioning; PVM; transmission line matrix methods; Transmission Line Modelling; virtual machines; workstation clusters", pubcountry = "UK", sponsororg = "IEE; Univ. Florence", treatment = "P Practical", } @Book{Snir:1996:MCR, author = "Marc Snir and Steve W. Otto and Steven Huss-Lederman and David W. Walker and Jack Dongarra", title = "{MPI}: the complete reference", publisher = pub-MIT, address = pub-MIT:adr, pages = "xii + 336", year = "1996", ISBN = "0-262-69184-1", ISBN-13 = "978-0-262-69184-0", LCCN = "QA76.642.M65 1996", bibdate = "Fri Jan 31 07:16:14 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$27.50", acknowledgement = ack-nhfb, } @InProceedings{Soch:1996:PCG, author = "M. Soch and J. Trdlicka and P. Tvrdik", title = "{PVM}, computational geometry, and parallel computing course", crossref = "Bode:1996:PVM", pages = "38--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C0220 (Computing education and training); C4240P (Parallel programming and algorithm theory); C4260 (Computational geometry); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6130B (Graphics techniques); C7310 (Mathematics computing); C7810C (Computer-aided instruction)", corpsource = "Dept. of Comput. Sci. and Eng., Czech Tech. Univ., Prague, Czech Republic", keywords = "computational geometry; computer science education; courseware; distributed memory machines; distributed memory systems; educational courses; laboratories; mathematics computing; non-trivial parallel algorithms; parallel algorithms; parallel computing course; parallel programming; programming tool; projects; student PVM; students; SUN; teaching; term projects; upper level undergraduate; workstation lab", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Squyres:1996:CBP, author = "J. M. Squyres and A. Lumsdaine and R. L. Stevenson", title = "A cluster-based parallel image processing toolkit", crossref = "Grinstein:1996:VDE", volume = "2421", pages = "228--239", year = "1996", CODEN = "PSISDG", ISSN = "0277-786X (print), 1996-756X (electronic)", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Lab. for Sci. Comput., Notre Dame Univ., IN, USA", classification = "B6140C (Optical information, image and video signal processing); C5260B (Computer vision and image processing techniques); C6110B (Software engineering techniques); C6115 (Programming support); C6150N (Distributed systems software)", keywords = "Cluster-based computing; Data I/O; Data locality; Load balancing; Message passing interface; MPI standard; Network technologies; Parallel computing resource; Parallel image processing software library; Parallel image processing toolkit; Sequential image processing; Specialized massively parallel computing hardware; Visualization; Workstation clusters", thesaurus = "Computer networks; Image processing; Message passing; Parallel processing; Software libraries; Software tools; Workstations", } @Article{Stathopoulos:1996:PIM, author = "Andreas Stathopoulos and Anders B. Ynnerman and Charlotte {Froese Fischer}", title = "A {PVM} Implementation of the {MCHF} Atomic Structure Package", journal = j-IJSAHPC, volume = "10", number = "1", pages = "41--61", month = "Spring", year = "1996", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software); C7310 (Mathematics computing); C7320 (Physics and chemistry computing); C7400 (Engineering computing)", corpsource = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville, TN, USA", fjournal = "International Journal of Supercomputer Applications and High Performance Computing", keywords = "algorithmic; atomic data; atomic structure; calculations; computational demands; computing; CPU; disk space; engineering; engineering computing; evaluation; high-end workstation cluster; IBM SP2 multiprocessor; improvements; mathematics; MCHF atomic; Multiconfiguration Hartree--Fock package; parallel computers; parallel machines; parallel programming; Parallel Virtual Machine; physics; prime memory; problem size; PVM implementation; PVM programming; science; software packages; software performance; speed; structure package; user interfaces; user-friendly interface", treatment = "A Application; P Practical", } @InProceedings{Steed:1996:PPP, author = "M. R. Steed and M. J. Clement", title = "Performance prediction of {PVM} programs", crossref = "IEEE:1996:PII", pages = "803--807", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6110P (Parallel programming); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Brigham Young Univ., Provo, UT, USA", keywords = "analysis; APACHE; Automated; clusters; debugging; evaluation; local area networks; message passing library; Parallel; parallel; parallel computing; parallel programming; performance; performance tools; program; program debugging; programming; PVM Application Characterization Environment; PVM programs; scalable parallel applications; software libraries; software performance; software performance prediction; software reusability; software tools; Virtual Machine; workstation", sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Stellner:1996:CCP, author = "G. Stellner", title = "{CoCheck}: checkpointing and process migration for {MPI}", crossref = "IEEE:1996:PII", pages = "526--531", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5620L (Local area networks); C6150N (Distributed systems software)", conftitle = "Proceedings of International Conference on Parallel Processing", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "checkpointing; CoCheck; consistency; LAN; local area networks; message passing; message passing library; MPI; parallel applications; parallel machines; process migration; resource allocation; single process checkpointer; software libraries; workstation networks", sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @InProceedings{Stone:1996:RNF, author = "J. Stone and M. Underwood", title = "Rendering of numerical flow simulations using {MPI}", crossref = "IEEE:1996:PSM", pages = "138--141", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C6130B (Graphics techniques); C6150N (Distributed systems software); C6185 (Simulation techniques); C7320 (Physics and chemistry computing)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Dept. of Comput. Sci., Missouri Univ., Rolla, MO, USA", keywords = "application program interfaces; computational fluid dynamics code; data visualisation; dedicated graphics workstations; digital simulation; disks; fluid dynamics; IBM SP2; in-place rendering; Intel iPSC/860; Intel Paragon; message passing; Message Passing Interface; MPI; networked graphics workstations; numerical flow simulations; parallel architectures; physics computing; ray tracing; ray tracing library; rendering (computer graphics); run-time visualization; software libraries; workstation networks", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "A Application; P Practical", } @InProceedings{Strietzel:1996:PTS, author = "M. Strietzel", title = "Parallel turbulence simulation based on {MPI}", crossref = "Liddell:1996:HPC", pages = "283--289", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0260 (Numerical approximation and analysis); A0270 (Computational techniques); A4710 (General fluid dynamics theory, simulation and other computational methods); A4725 (Turbulent flows, convection, and heat transfer); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7320 (Physics and chemistry computing)", conftitle = "High-Performance Computing and Networking. International Conference and Exhibition HPCN Europe 1996", corpsource = "Zentrum fur Paralleles Rechnen, Koln Univ., Germany", keywords = "direct numerical simulation; divide and conquer method; divide and conquer methods; domain decomposition; flow simulation; large-eddy simulation; message passing; message passing platform; MPI; numerical analysis; parallel algorithms; parallel turbulence simulation; parallelization strategy; physics computing; Poisson equation; three dimensional incompressible Navier--Stokes equation; turbulence; turbulent fluids", treatment = "T Theoretical or Mathematical", } @Article{Subramaniam:1996:CLU, author = "Krishnan R. Subramaniam and Suraj C. Kothari and Don Heller", title = "A Communication Library Using Active Messages to Improve Performance of {PVM}", journal = j-J-PAR-DIST-COMP, volume = "39", number = "2", pages = "146--152", day = "15", month = dec, year = "1996", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1996.0162", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:01 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.0162/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.0162/production/pdf", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci., Iowa State Univ., Ames, IA, USA", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "active messages; communication; communication library; controlled; message passing; parallel processing; polling; primitives; signal driven message notification; software libraries; software performance evaluation; virtual machines", treatment = "P Practical", } @InProceedings{Sunderam:1996:PSS, author = "V. Sunderam", title = "The {PVM} system: status, trends, and directions", crossref = "Bode:1996:PVM", pages = "68--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta, GA, USA", keywords = "API; application program interfaces; environments; future directions; heterogeneous; heterogeneous concurrent computing; high performance computing; network computing; parallel programming; Parallel Virtual Machine; programming model; PVM system; robust portable implementations; software framework; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @Article{Suttner:1996:SPB, author = "C. B. Suttner", title = "{SPTHEO} --- a {PVM-based} parallel theorem prover", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "116--125", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1160 (Combinatorial mathematics); C1180 (Optimisation techniques); C4210 (Formal logic); C6110P (Parallel programming); C7430 (Computer engineering)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", fjournal = "Lecture Notes in Computer Science", keywords = "communication aspects; environments; high latency; message passing; parallel programming; parallel search; parallelization; portable implementation; proof; PVM based parallel theorem prover; PVM message passing system; search problems; SETHEO; SPS model; SPTHEO; system; theorem proving; virtual machines; workstation networks", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Theodoropoulos:1996:ESP, author = "P. Theodoropoulos and G. Manis and P. Tsanakas and G. Papakonstantinou", title = "Extending synchronization {PVM} mechanisms", crossref = "Bode:1996:PVM", pages = "315--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "Dept. of Electr. Eng., Nat. Tech. Univ. of Athens, Greece", keywords = "barriers; global semaphores; message; message passing; operating system; operating systems (computers); Orchid platform; parallel; parallel virtual machine; passing; primitives; programming; synchronisation; synchronization; virtual machines", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Touhafi:1996:DPC, author = "A. Touhafi and W. Brissinck and E. F. Dirkx", title = "Development of {PVM} code for a low latency switch based interconnect", crossref = "Bode:1996:PVM", pages = "229--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150J (Operating systems); C6150N (Distributed systems software); C7430 (Computer engineering)", corpsource = "V.U.B. TW-INFO, Brussels, Belgium", keywords = "communication; device driver; device drivers; distributed parallel computing; end latency; Ethernet; fast switch based network; intensive applications; interchanged messages; interconnection networks; low latency switch based interconnect; machines; message; message passing; message passing tool; multiprocessor; parallel programming; passing; PVM code; PVM routines; small end-to-; virtual", pubcountry = "Germany", treatment = "P Practical", } @Article{Twerda:1996:PIT, author = "A. Twerda and A. P. {Van den Berg} and A. J. {Van der Steen}", title = "Parallel implementation of time dependent {Rayleigh-Benard} convection", journal = j-SUPERCOMPUTER, volume = "12", number = "2", pages = "36--47", month = mar, year = "1996", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Wed Mar 18 08:37:01 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Dept. of Geophys., Utrecht Univ., Netherlands", classification = "A4720 (Hydrodynamic stability and instability); C5440 (Multiprocessing systems); C7320 (Physics and chemistry computing)", corpsource = "Dept. of Geophys., Utrecht Univ., Netherlands", fjournal = "Supercomputer", keywords = "distributed memory parallel systems; Gray T3D; message passing; message passing model; parallel implementation; parallel models; parallel processing; physics computing; PVM; Rayleigh-Benard instability; scalability; time dependent Rayleigh-Benard convection", pubcountry = "Netherlands", treatment = "A Application; P Practical", } @InProceedings{Uhl:1996:PIC, author = "A. Uhl and J. Hammerle", title = "Parallel image compression on a workstation cluster using {PVM}", crossref = "Bode:1996:PVM", pages = "301--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B6120B (Codes); B6140C (Optical information, image and video signal processing); C1250 (Pattern recognition); C4240P (Parallel programming and algorithm theory); C5440 (Multiprocessing systems)", corpsource = "Dept. of Comput. Sci. and Syst. Anal., Salzburg Univ., Austria", keywords = "data compression; FDDI; fractal image compression; image coding; interconnected workstation-cluster; parallel algorithms; parallel image compression; parallel machines; parallel meta-algorithm; PVM; virtual machines; workstation cluster", pubcountry = "Germany", treatment = "A Application; P Practical", } @InProceedings{Wagner:1996:GSG, author = "T. Wagner and C. Kueblbeck and C. Schittko", title = "Genetic selection and generation of textural features with {PVM}", crossref = "Bode:1996:PVM", pages = "305--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B0260 (Optimisation techniques); B6140C (Optical information, image and video signal processing); C1180 (Optimisation techniques); C4240P (Parallel programming and algorithm theory); C5260B (Computer vision and image processing techniques); C7330 (Biology and medical computing); C7430 (Computer engineering)", corpsource = "Fraunhofer-Institut fur Integrierte Schaltungen, Erlangen, Germany", keywords = "algorithms; cell identification; Gallops PVM package; genetic algorithms; genetic selection; medical image processing; medical imaging; parallel; PVM; quality control; surface inspection; systems; textural features generation; textural image features; textured images; tumor; virtual machines", pubcountry = "Germany", treatment = "A Application; P Practical", } @Article{Wagner:1996:PMM, author = "J. C. Wagner and A. Haghighat", title = "Parallel {MCNP Monte Carlo} transport calculations with {MPI}", journal = j-TRANS-AM-NUCL-SOC, volume = "75", number = "??", pages = "338--339", month = "????", year = "1996", CODEN = "TANSAO", ISSN = "0003-018X", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A0250 (Probability theory, stochastic processes, and statistics); A0270 (Computational techniques); A2820H (Neutron diffusion); A2841C (Computer codes for fission reactor theory and design); C1140G (Monte Carlo methods); C4240P (Parallel programming and algorithm theory); C6110P (Parallel programming); C7470 (Nuclear engineering computing)", conftitle = "American Nuclear Society and the European Nuclear Society 1996 International Conference on the Global Benefits of Nuclear Technology and the Embedded Topical Meetings. Low- Level Radiation Health Effects, DD and R: Worldwide Experience- DD and R, What Does it Mean, and International Nuclear Policy Issues (papers in summary form only received)", corpsource = "Pennsylvania State Univ., University Park, PA, USA", fjournal = "Transactions of the American Nuclear Society", keywords = "fission reactor kinetics; high-speed communication switches; message passing; message-passing interface; message-passing library package; message-passing software package; Monte Carlo methods; MPI; neutron transport theory; nuclear engineering computing; parallel algorithms; parallel MCNP Monte Carlo transport calculations; parallel Monte Carlo; parallel virtual machine; workstation clusters", treatment = "P Practical", } @TechReport{Walker:1996:MFA, author = "David W. Walker", title = "{MPI}: from Fundamentals to Applications", institution = inst-ORNL, address = inst-ORNL:adr, year = "1996", bibdate = "Tue Jan 16 08:29:47 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/~walker/mpi/SLIDES/mpi-tutorial.html", } @Misc{Walker:1996:MP, author = "David W. Walker", title = "{MPI2} Proposals", howpublished = "World-Wide Web", year = "1996", bibdate = "Tue Jan 16 08:33:57 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/~walker/mpi/mpi2-proposals.html", } @Article{Walker:1996:MSM, author = "D. W. Walker and J. J. Dongarra", title = "{MPI}: a standard Message Passing Interface", journal = j-SUPERCOMPUTER, volume = "12", number = "1", pages = "56--68", month = jan, year = "1996", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", classification = "C5640 (Protocols); C6150N (Distributed systems software); C6155 (Computer communications software)", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", fjournal = "Supercomputer", keywords = "collective communication routine; Collective communication routine; computer interface; Computer interface; computer interfaces; computer standard; Computer standard; distributed processing; Distributed processing; Europe; massively parallel computer; Massively parallel computer; message passing; message passing interface; Message passing interface; MPI; network interface; Network interface; network interfaces; parallel processing; Parallel processing; point-to-point; Point-to-point; protocols; software standards; standards; United States; workstation network; Workstation network", pubcountry = "Netherlands", thesaurus = "Computer interfaces; Distributed processing; Message passing; Network interfaces; Parallel processing; Protocols; Software standards; Standards", treatment = "P Practical", } @Article{Walker:1996:RBC, author = "D. W. Walker and S. W. Otto", title = "Redistribution of block-cyclic data distributions using {MPI}", journal = j-CPE, volume = "8", number = "9", pages = "707--728", month = nov, year = "1996", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:27 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=23305", acknowledgement = ack-nhfb, classification = "C4240P (Parallel programming and algorithm theory); C6140D (High level languages); C6150N (Distributed systems software)", corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA", fjournal = "Concurrency, practice and experience", keywords = "block-cyclic data distributions; block-cyclic fashion; broadcast; collective communication operations; FORTRAN; High Performance Fortran; IBM SP-1; Intel Paragon; load balancing properties; message passing; message passing algorithms; MPI; MPI-ALLTOALL; parallel algorithms; processor scheduling; redistribution operation; resource allocation", treatment = "T Theoretical or Mathematical", } @InProceedings{Wedemeijer:1996:PSA, author = "H. Wedemeijer and H. L. H. Cox and D. J. Verschuur and I. L. Ritsema", title = "Parallelisation of seismic algorithms using {PVM} and {FORGE}", crossref = "Liddell:1996:HPC", pages = "352--??", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "A9130 (Seismology); A9130R (Controlled source seismology); A9365 (Data and information; A9385 (Instrumentation and techniques for geophysical, hydrospheric and lower atmosphere research); acquisition, processing, storage and dissemination in geophysics); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming); C7340 (Geophysics computing)", corpsource = "TNO Inst. of Appl. Geosci., Delft, Netherlands", keywords = "algorithms; Earth subsurface; explosion seismology; FORGE; geophysical prospecting; geophysical signal processing; geophysical techniques; geophysics computing; imaging; implementation; measurement technique; method; optimisation; parallel; parallel programming; parallelisation; prospecting; PVM; seismic algorithm; seismic reflection profiling; seismology; signal processing", pubcountry = "Germany", treatment = "P Practical; T Theoretical or Mathematical", } @Article{Wilson:1996:SMS, author = "G. C. Wilson and T. H. Wood and J. L. Zyskind and J. W. Sulhoff and J. E. Johnson and T. Tanbun-Ek and P. A. Morton", title = "{SBS} and {MPI} suppression in analogue systems with integrated electroabsorption modulator\slash {DFB} laser transmitters", journal = j-ELECT-LETTERS, volume = "32", number = "16", pages = "1502--1504", month = "????", year = "1996", CODEN = "ELLEAK", ISSN = "0013-5194 (print), 1350-911X (electronic)", ISSN-L = "0013-5194", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "B4150 (Electro- optical devices); B4360 (Laser applications); B6260 (Optical links and equipment); B6430D (CATV and wired systems)", corpsource = "Lucent Technol., Holmdel, NJ, USA", fjournal = "Electronics Letters", keywords = "1.7 GHz; analogue systems; cable television; CATV systems; chirp modulation; DFB laser transmitters; distributed feedback lasers; dithering; electro-optical modulation; electroabsorption; electroabsorption modulator; excess noise; integrated modulator/laser transmitters; interference suppression; laser beam applications; laser bias current; low-chirp modulation; MPI suppression; multipath interference; narrow linewidth sources; optical fibre communication; optical noise; optical transmitters; SBS suppression; stimulated Brillouin scattering", treatment = "X Experimental", } @Article{Wismueller:1996:SBV, author = "R. Wismueller", title = "State Based Visualization of {PVM} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "91--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wismueller:1996:TSI, author = "R. Wismueller and T. Ludwig", title = "The Tool-Set --- An Integrated Tool Environment for {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "??", number = "1067", pages = "1029--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Wismuller:1996:SBV, author = "R. Wismuller", title = "State based visualization of {PVM} applications", crossref = "Bode:1996:PVM", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6110V (Visual programming); C6130B (Graphics techniques); C6150G (Diagnostic, testing, debugging and evaluating systems); C7430 (Computer engineering)", corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany", keywords = "animated sequence; behavior; consistent; critical issue; debugging; dynamic; event ordering algorithm; global; global clock; optimization; parallel programming; parallel programs; program diagnostics; program flow; PVM applications; state based visualization; state based visualizer; states; virtual machines; VISTOP; visual programming; visualization; visualization tool", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Wismuller:1996:TSI, author = "R. Wismuller and T. Ludwig", title = "The {Tool Set} --- an integrated tool environment for {PVM}", crossref = "Liddell:1996:HPC", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support)", corpsource = "Lehrstuhl fur Rechnertechnik und Rechnerorganisation, Tech. Univ. Munchen, Germany", keywords = "checkpointing; debugging; deterministic execution; development; integrated tool environment; load balancing; parallel I/O; parallel program; parallel programming; performance analysis; program flow visualization; programming environments; PVM; software; Tool Set; tools", pubcountry = "Germany", treatment = "P Practical", } @InProceedings{Wolf:1996:CFS, author = "K. Wolf and E. Brakkee", title = "Coupling fluids and structures codes on {MPI}", crossref = "IEEE:1996:PSM", pages = "130--137", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6115 (Programming support); C6150N (Distributed systems software); C6185 (Simulation techniques)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "German Nat. Res. Center for Comput. Sci., St. Augustin, Germany", keywords = "address-spaces; API; application program interface; application program interfaces; computational power; dedicated neighborhoods; digital simulation; dynamic process sets; fluids codes; industrial simulation applications; message passing; message passing interface; message passing libraries; MPI; MPI-WORLDs; parallel systems; portability; software libraries; software portability; standalone address-spaces; storage management; structures codes; synchronisation", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Worley:1996:MPE, author = "P. H. Worley", title = "{MPI} performance evaluation and characterization using a compact application benchmark code", crossref = "IEEE:1996:PSM", pages = "170--177", year = "1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software)", conftitle = "Proceedings. Second MPI Developer's Conference", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "application program interface; application program interfaces; communication library; communication protocol; communication routines; communication-intensive application code; compact application benchmark code; Cray Research T3D; IBM SP2; Intel Paragon; message passing; message passing standard; MPI; parallel benchmark code; parallel machines; PSTSWM; software libraries; software performance evaluation; software standards", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "P Practical", } @InProceedings{Xiong:1996:BID, author = "Jianxin Xiong and Dingxing Wang and Weimin Zheng and Meiming Shen", title = "{BUSTER}: an integrated debugger for {PVM}", crossref = "IEEE:1996:PIS", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", corpsource = "Dept. of Comput. Sci. and Technol., Tsinghua Univ., Beijing, China", keywords = "BUSTER debugger; communication; debugging; global states; integrated parallel debugger; local area networks; nondeterminism; parallel; parallel programming; performance debugging; program; programming environments; PVM programming environment; related errors; workstation clusters; workstation network; workstations", treatment = "P Practical", } @Article{Xu:1996:MCO, author = "Zhiwei Xu and Kai Hwang", title = "Modeling Communication Overhead: {MPI} and {MPL} Performance on the {IBM SP2}", journal = j-IEEE-PAR-DIST-TECH, volume = "4", number = "1", pages = "9--24", month = "Spring", year = "1996", CODEN = "IPDTEX", DOI = "https://doi.org/10.1109/88.481662", ISSN = "1063-6552 (print), 1558-1861 (electronic)", ISSN-L = "1063-6552", bibdate = "Thu Apr 10 19:14:33 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing)", corpsource = "Acad. Sinica, Beijing, China", fjournal = "IEEE parallel and distributed technology: systems and applications", keywords = "architectural bottlenecks; collective; collective communication; collective computation; communication; communication overhead modelling; communication performance evaluation; computation; IBM computers; IBM Message-Passing; IBM Message-Passing Library; IBM SP2; Library; machine size; massively parallel; massively parallel processors; message length; message passing; Message-Passing Interface; message-passing multicomputers; message-passing operations; MPI performance; MPL; MPL performance; optimization; overhead-; overhead- quantifying method; parallel applications; parallel applications optimization; parallel machines; performance; performance evaluation; point-to-point; point-to-point communication; processors; quantifying method; timing; timing measurements", treatment = "X Experimental", } @Article{Yoon:1996:WBP, author = "D.-K. Yoon and J.-L. Gaudiot", title = "Worker-Based Parallel Computing on {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1123", pages = "506--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5440 (Multiprocessing systems); C6150N (Distributed systems software)", corpsource = "Dept. of Electr. Eng. Syst., Univ. of Southern California, Los Angeles, CA, USA", fjournal = "Lecture Notes in Computer Science", keywords = "asynchronous tasks; high performance computing; message passing; networks of workstations; parallel function calls; parallel processing; parallel processing subsystem; Parallel Virtual Machine; primitives; run-time system; software package; software packages; user application programs; worker-based parallel computing", pubcountry = "Germany", treatment = "P Practical", xxpages = "2--??", xxvolume = "1", } @Article{Yuasa:1996:RPG, author = "F. Yuasa and S. Kawabata and T. Ishikawa and D. Perret-Gallix and T. Kaneko", title = "Running {PVM-GRACE} on Workstation Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1156", pages = "335--??", month = "????", year = "1996", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150N (Distributed systems software); C7320 (Physics and chemistry computing)", corpsource = "Nat. Lab. for High Energy Phys., Ibaraki, Japan", fjournal = "Lecture Notes in Computer Science", keywords = "automatic Feynman diagram computation; event generation; Feynman diagrams; high energy physics; parallel machines; parallel virtual machine; physics computing; PVM-GRACE; software packages; software packet; virtual machines; workstation clusters", pubcountry = "Germany", treatment = "A Application; P Practical", } @InProceedings{Zambonelli:1996:EPP, author = "F. Zambonelli and M. Pugassi and L. Leonardi and N. Scarabottolo", title = "Experiences on porting a {Parallel Objects} environment from a transputer network to a {PVM-based} system", crossref = "IEEE:1996:PFE", pages = "", year = "1996", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110B (Software engineering techniques); C6110J (Object-oriented programming); C6110P (Parallel programming); C6115 (Programming support); C6150N (Distributed systems software)", corpsource = "Dipartimento di Elettronica Inf. e Sistemistica, Bologna Univ., Italy", keywords = "computer aided software engineering; heterogeneous computer networks; massively parallel architecture; object-oriented; parallel object-oriented programming; Parallel Objects; parallel programming; programming; programming environments; PVM environment; software portability", treatment = "P Practical", } @TechReport{Zhou:1996:FMP, author = "Honbo Zhou and Al Geist", title = "Faster Message Passing in {PVM}", institution = "Mathematical Sciences Section, " # inst-ORNL, address = inst-ORNL:adr, year = "1996", pages = "7", bibdate = "Tue Jan 16 08:18:15 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.epm.ornl.gov/~zhou/patm.ps", } @Article{Adamo:1997:AOO, author = "J.-M. Adamo", title = "{ARCH}, An Object Oriented {MPI}-Based Library for Asynchronous and Loosely Synchronous Parallel System Programming", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "67--74", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Aguilar:1997:PMS, author = "J. Aguilar and T. Jimenez", title = "A Processors Management System for {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "158--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ahmad:1997:EVP, author = "Ishfaq Ahmad", title = "{Express} versus {PVM}: a performance comparison", journal = j-PARALLEL-COMPUTING, volume = "23", number = "6", pages = "783--812", day = "20", month = jun, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:15:27 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=23&issue=6; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=23&issue=6&aid=1138", acknowledgement = ack-nhfb, affiliation = "Hong Kong Univ of Science and Technology", affiliationaddress = "Kowloon, Hong Kong", classification = "716.1; 722.4; 723; 723.1", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Computer programming; Computer software portability; Computer workstations; Data communication systems; Hypercube computers; Interprocessor communication; Parallel algorithms; Parallel processing systems; Parallel virtual machine", } @Article{Alexandrov:1997:PMC, author = "V. Alexandrov and K. Chan and A. Gibbons and W. Rytter", title = "On the {PVM\slash MPI} Computations of Dynamic Programming Recurrences", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "305--312", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Alfaro:1997:FDW, author = "F. J. Alfaro and J. A. Gallud and J. L. Sanchez", title = "A Function to Dynamic Workload Allocation in Distributed Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "219--225", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Alonso:1997:PBB, author = "J. L. Alonso and H. Schmidt and V. N. Alexandrov", title = "Parallel Branch and Bound Algorithms for Integer and Mixed Integer Linear Programming Problems under {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "313--320", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Anonymous:1997:TNR, author = "Anonymous", title = "Technology News \& Reviews: {Chemkin} software; {OpenMP Fortran Standard}; {ODE} Toolbox for {Matlab}; {Java} products; {Scientific WorkPlace 3.0}", journal = j-IEEE-COMPUT-SCI-ENG, volume = "4", number = "4", pages = "75--??", month = oct # "\slash " # dec, year = "1997", CODEN = "ISCEE4", ISSN = "1070-9924 (print), 1558-190X (electronic)", ISSN-L = "1070-9924", bibdate = "Sat Jan 9 08:57:23 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c4075.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Computational Science \& Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99", } @Article{Aversa:1997:MDP, author = "R. Aversa and G. Iannello and N. Mazzocca", title = "An {MPI} Driven Parallelization Strategy for Different Computing Platforms: a Case Study", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "401--408", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bala:1997:PVQ, author = "P. Bala and T. Clark and P. Grochowski and B. Lesyng", title = "Parallel Version of a Quantum Classical Molecular Dynamics Code for Complex Molecular and Biomolecular Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "409--416", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Baraglia:1997:IPW, author = "R. Baraglia and M. Cosso and D. Laforenza and M. Nicosia", title = "Integrating {PVaniM} into {WAMM} for Monitoring Meta-Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "226--233", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Barbosa:1997:EUW, author = "J. G. Barbosa and A. J. Padilha and J.-P. Madier and T. Neubert", title = "Experiments on Using {WPVM} for Industrial Visual Inspection Problems", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "828--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Beazley:1997:EMP, author = "D. M. Beazley and P. S. Lomdahl", title = "Extensible message passing application development and debugging with {Python}", crossref = "IEEE:1997:PIP", pages = "650--655", year = "1997", bibdate = "Thu May 21 19:02:04 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture)C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6140D (High level languages); C6150C (Compilers, interpreters and other processors); C6150G (Diagnostic, testing, debugging and evaluating systems); C6150N (Distributed systems software)", conftitle = "Proceedings 11th International Parallel Processing Symposium", corpsource = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT, USA", keywords = "application specific debugging; CM-5; Cray T3D; extensible message passing application debugging; extensible message passing application development; interpreted object oriented scripting language; large-scale message passing applications; message passing; message passing program writing; molecular dynamics application; MPI; multiprocessing systems; object-oriented languages; parallel machines; parallel programming; program debugging; program interpreters; Python parallelisation; rapid prototyping; software prototyping; Sun multiprocessor servers", sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel Process.; ACM SIGARCH; Eur. Assoc. Theor. Comput. Sci. (EATCS); Swiss Special Interest Group on Parallelism (SIPAR); SPPEDUP Soc", treatment = "P Practical", } @Article{Beisel:1997:EMD, author = "T. Beisel and E. Gabriel and M. Resch", title = "An Extension to {MPI} for Distributed Computing on {MPPs}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "75--82", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bendtsen:1997:RLS, author = "C. Bendtsen and Z. Zlatev", title = "Running Large-Scale Air Pollution Models on Message Passing Machines", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "417--426", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bhandarkar:1997:CRP, author = "Suchendra M. Bhandarkar and Salem Machaka", title = "Chromosome Reconstruction from Physical Maps Using a Cluster of Workstations", journal = j-J-SUPERCOMPUTING, volume = "11", number = "1", pages = "61--86", month = mar, year = "1997", CODEN = "JOSUED", DOI = "https://doi.org/10.1023/A:1007913429509", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 6 12:13:06 MDT 2005", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=11&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/issuetoc.htm/0920-8542+11+1+1997", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=11&issue=1&spage=61; http://www.wkap.nl/oasis.htm/141471", acknowledgement = ack-nhfb, classification = "C1180 (Optimisation techniques); C6110P (Parallel programming); C6150N (Distributed systems software); C7330 (Biology and medical computing); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Georgia Univ., Athens, GA, USA", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Aspergillus nidulans; biology computing; central computational problem; Chromosome IV; chromosome reconstruction; classical NP complete optimal linear arrangement problem; clonal data; clone ordering; genetics; genomic library; heterogeneous collection; Markov chain decomposition; microcanonical annealing; networked computers; parallel algorithms; Parallel Virtual Machine; physical maps; PVM system; simulated annealing; single monolithic parallel computing resource; software system; Unix workstations; virtual machines; workstation cluster", pubcountry = "Netherlands", treatment = "P Practical", } @Article{Blackford:1997:PEN, author = "L. S. Blackford and A. Cleary and A. Petitet and R. C. Whaley and J. Demmel and I. Dhillon and H. Ren and K. Stanley and J. Dongarra and S. Hammarling", title = "Practical Experience in the Numerical Dangers of Heterogeneous Computing", journal = j-TOMS, volume = "23", number = "2", pages = "133--147", month = jun, year = "1997", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/264029.264030", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/citations/journals/toms/1997-23-2/p133-blackford/", abstract = "Special challenges exist in writing reliable numerical library software for heterogeneous computing environments. Although a lot of software for distributed-memory parallel computers has been written, porting this software to a network of workstations requires careful consideration. The symptoms of heterogeneous computing failures can range from erroneous results without warning to deadlock. Some of the problems are straightforward to solve, but for others the solutions are not so obvious, or incur an unacceptable overhead. Making software robust on heterogeneous systems often requires additional communication. We describe and illustrate the problems encountered during the development of ScaLAPACK and the NAG Numerical PVM Library. Where possible, we suggest ways to avoid potential pitfalls, or if that is not possible, we recommend that the software not be used on heterogeneous networks.", acknowledgement = ack-rfb # " and " # ack-kr, fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "distributed-memory systems, floating-point arithmetic, heterogeneous processor networks, message passing, numerical software, reliability", subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent Programming, Distributed programming. {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Computer arithmetic. {\bf G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS, General, Parallel algorithms.", } @Article{Blaheta:1997:PIP, author = "R. Blaheta and O. Jakl and J. Stary", title = "{PVM}-Implementation of the {PCG} Method with Displacement Decomposition", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "321--328", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bozas:1997:PED, author = "G. Bozas and M. Fleischhauer and S. Zimmermann", title = "{PVM} Experiences in Developing the {MIDAS} Parallel Database System", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "427--434", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bramley:1997:TNR, author = "Randall Bramley", title = "Technology News \& Reviews: {Chemkin} software; {OpenMP Fortran Standard}; {ODE} Toolbox for {Matlab}; {Java} products; {Scientific WorkPlace 3.0}", journal = j-IEEE-COMPUT-SCI-ENG, volume = "4", number = "4", pages = "75--78", month = oct # "\slash " # dec, year = "1997", CODEN = "ISCEE4", ISSN = "1070-9924 (print), 1558-190X (electronic)", ISSN-L = "1070-9924", bibdate = "Sat Jan 9 08:57:23 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/ieeecomputscieng.bib; https://www.math.utah.edu/pub/tex/bib/java.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c4075.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Computational Science \& Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99", } @Article{Bruck:1997:EMP, author = "Jehoshua Bruck and Danny Dolev and Ching-Tien Ho and Marcel-C{\u{a}}t{\u{a}}lin Ro{\c{s}}u and Ray Strong", title = "Efficient Message Passing Interface ({MPI}) for Parallel Computing on Clusters of Workstations", journal = j-J-PAR-DIST-COMP, volume = "40", number = "1", pages = "19--34", day = "10", month = jan, year = "1997", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1996.1267", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:01 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production/ref", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5620L (Local area networks); C6110P (Parallel programming); C6115 (Programming support)", corpsource = "California Inst. of Technol., Pasadena, CA, USA", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "clusters of; collective communication functionality; industrial; level reliable transport protocol; local area networks; message passing; message passing interface; MPI-CCL layer; parallel computing; parallel programming; parallel programming environments; point-to-point communication; programming environments; standard; standards; user-; workstations", treatment = "A Application; P Practical", } @Article{Brune:1997:HMP, author = "Matthias Brune and J{\"o}rn Gehring and Alexander Reinefeld", title = "Heterogeneous Message Passing and a Link to Resource Management", journal = j-J-SUPERCOMPUTING, volume = "11", number = "4", pages = "355--369", month = dec, year = "1997", CODEN = "JOSUED", DOI = "https://doi.org/10.1023/A:1007966723231", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 6 12:13:07 MDT 2005", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=11&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/issuetoc.htm/0920-8542+11+4+1997", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=11&issue=4&spage=355; http://www.wkap.nl/oasis.htm/147011", acknowledgement = ack-nhfb, classification = "C5640 (Protocols); C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Paderborn Univ., Germany", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "communication protocols; computer resource management; message passing; MPI; parallel process communication; portability; process mapping; processor scheduling; protocols; PVM; resource allocation; resource management", pubcountry = "Netherlands", treatment = "T Theoretical or Mathematical", } @Article{Bubak:1997:EPA, author = "M. Bubak and W. Funika and J. Moscinski", title = "Evaluation of Parallel Application's Behavior in Message Passing Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "234--241", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Carvalho:1997:PCC, author = "L. M. R. Carvalho and J. M. L. M. Palma", title = "Parallelization of a {CFD} Code Using {PVM} and Domain Decomposition Techniques", journal = j-LECT-NOTES-COMP-SCI, volume = "1215", pages = "247--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Aug 22 11:59:49 MDT 1997", bibsource = "ftp://ftp.math.utah.edu/pub/bibnet/subjects/domain-decomp.bib; https://www.math.utah.edu/pub/tex/bib/lncs1997a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ciegis:1997:NID, author = "R. Ciegis and R. Sablinskas and J. Wasniewski", title = "Numerical Integration on Distributed-Memory Parallel Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "329--336", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Clematis:1997:DNL, author = "A. Clematis and A. Coda and M. Spagnuolo and M. Mineter", title = "Developing Non-Local Iterative Parallel Algorithms for {GIS} on {Cray T3D} Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "435--442", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Cotronis:1997:MPP, author = "J. Y. Cotronis", title = "Message-Passing Program Development by Ensemble", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "242--249", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Demuynck:1997:DOD, author = "K. Demuynck and J. Broeckhove and F. Arickx", title = "Dynamic Optimization of a Distributed {VR} System by Network-Balancing", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "443--450", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Derakhshan:1997:PEP, author = "M. Derakhshan and S. Hammarling and A. Krommer", title = "{PINEAPL}: a {European} Project on {Parallel Industrial Numerical Applications and Portable Libraries}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "337--342", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{DiMartino:1997:IPD, author = "B. {Di Martino} and A. Mazzeo and N. Mazzocca and U. Villano", title = "Interaction Patterns Detection in {PVM} Programs to Support Simulation", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "250--256", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{DiMartino:1997:MDH, author = "V. {Di Martino} and G. Ruocco", title = "Molecular Dynamics on Hybrid Memory Machines", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "451--456", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{DiNapoli:1997:DCA, author = "C. {Di Napoli} and M. Giordano and M. M. Furnari", title = "Distributed and Cooperative Applications in {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "83--90", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dongarra:1997:BCA, author = "J. J. Dongarra and F. Desprez and A. Petitet and C. Randriamaro", title = "Block-Cyclic Array Redistribution on Networks of Workstations", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "343--350", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Dongarra:1997:CSD, author = "J. J. Dongarra and S. Hammarling and A. Petitet", title = "Case studies on the development of {ScaLAPACK} and the {NAG} Numerical {PVM} Library", crossref = "Boisvert:1997:QNS", pages = "236--248", year = "1997", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/papers/woco96/woco96.html; http://www.netlib.org/utk/papers/woco96/woco96.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/woco96.pdf", acknowledgement = ack-nhfb, } @InProceedings{Dou:1997:ISV, author = "Yong Dou and Zhengbing Pang and Xingming Zhou", title = "Implementing a software virtual shared memory on {PVM}", crossref = "IEEE:1997:APD", pages = "", year = "1997", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6110P (Parallel programming); C6115 (Programming support); C6120 (File organisation); C6140D (High level languages); C7430 (Computer engineering)", corpsource = "Dept. of Comput. Sci., Changsha Inst. of Technol., Hunan, China", keywords = "distributed; FORTRAN; FORTRAN language; GKD-VSM; memory environments; multithread scheme; parallel programming; parallel programming model; Prefetch and Poststore; programming environments; PVM; shared memory; software overhead; software virtual shared memory; synchronisation; user-level; virtual machines; virtual storage", treatment = "P Practical", } @Article{Exbrayat:1997:OPS, author = "M. Exbrayat and H. Kosch", title = "Offering Parallelism to a Sequential Database Management System on a Network of Workstations Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "457--435", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fachat:1997:IEB, author = "Andr{\'e} Fachat and Karl Heinz Hoffmann", title = "Implementation of {Ensemble-Based Simulated Annealing} with dynamic load balancing under {MPI}", journal = j-COMP-PHYS-COMM, volume = "107", number = "1--3", pages = "49--53", month = dec, year = "1997", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(97)00096-9", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 21:30:21 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465597000969", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @TechReport{Fagg:1997:HMAa, author = "G. Fagg and J. Dongarra and A. Geist", title = "Heterogeneous {MPI} Application Interoperation and Process Management under {PVMPI}", type = "Technical report", number = "CS-97-???", institution = inst-UTK, address = inst-UTK:adr, month = jun, year = "1997", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/papers/pvmmpi97.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/pvmmpi97.pdf", acknowledgement = ack-nhfb, } @Article{Fagg:1997:HMAb, author = "G. E. Fagg and J. J. Dongarra and A. Geist", title = "Heterogeneous {MPI} Application Interoperation and Process Management under {PVMPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "91--98", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fang:1997:MDD, author = "Niandong Fang and Helmar Burkhart", title = "{MPI-DDL}: a distributed-data library for {MPI}", journal = j-FUT-GEN-COMP-SYS, volume = "12", number = "5", pages = "407--419", day = "1", month = apr, year = "1997", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 27 12:41:16 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/0167739X; https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/19/19/27/17/23/abstract.html", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @TechReport{Fin:1997:CPM, author = "Torsten Fin", title = "Comparing the performance of {MPI}, {PVM}, and {CORBA} on {Ethernet LANs}", type = "{Berichte zur Rechnerarchitektur}", number = "3(4)", institution = "Institut f{\"u}r Informatik, Lehrstuhl f{\"u}r Rechnerarchitektur und -kommunikation, Friedrich-Schiller-Universit{\"a}t Jena", address = "Jena, Germany", pages = "12", year = "1997", bibdate = "Wed Aug 27 06:51:17 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Fischer:1997:AAP, author = "Markus Fischer and Jack Dongarra", editor = "????", booktitle = "{Concurrent Computing Conference, Atlanta, GA, March 10--11, 1994}", title = "Another Architecture: {PVM} on {Windows 95\slash NT}", publisher = "????", address = "????", pages = "??--??", year = "1997", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/nt-paper.ps; http://www.netlib.org/utk/people/JackDongarra/pdf/nt-paper.pdf", acknowledgement = ack-nhfb, } @Article{Fischer:1997:ESP, author = "M. Fischer and J. Simon", title = "Embedding {SCI} into {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "177--184", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Foster:1997:MMC, author = "Ian Foster and Jonathan Geisler and Carl Kesselman and Steven Tuecke", title = "Managing Multiple Communication Methods in High-Performance Networked Computing Systems", journal = j-J-PAR-DIST-COMP, volume = "40", number = "1", pages = "35--48", day = "10", month = jan, year = "1997", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1996.1266", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:01 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production/ref", acknowledgement = ack-nhfb, classification = "B6150M (Protocols); B6210L (Computer communications); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C5640 (Protocols); C5670 (Network performance)", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "Argonne MPICH library; computer networks; computing systems; criteria; heterogeneous networked environment; high-performance networked; message passing; message passing interface; multimethod communication; multiple communication methods; multithreaded runtime system; networked computing environments; Nexus; Nexus-based MPI implementation; performance characteristics; performance evaluation; protocols; remote service request mechanisms; transport mechanisms; user-specified selection", treatment = "P Practical", } @TechReport{Gabriel:1997:EMU, author = "Edgar Gabriel and Thomas Beisel and Michael Resch", title = "{Erweiterung einer MPI-Umgebung zur Interoperabilit{\"a}t verteilter MPP-Systeme}. ({German}) [{Extension} of an {MPI} environment for interoperability with distributed {MPI} systems]", type = "{Studienarbeit angewandte Informatik}", number = "RUS 37", institution = "Rechenzentrum Universit{\"a}t Stuttgart", address = "Stuttgart, Germany", year = "1997", bibdate = "Wed Aug 27 06:55:46 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "German", } @Article{Galaktionov:1997:MST, author = "A. S. Galaktionov and P. D. Anderson and G. W. M. Peters", title = "Mixing Simulations: Tracking Strongly Deforming Fluid Volumes in {3D} Flows", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "436--469", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Galibert:1997:YCL, author = "O. Galibert", title = "{YLC}, {A C++ Linda} System on Top of {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "99--106", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{GarciaSalcines:1997:PRR, author = "E. {Garcia Salcines} and G. {Cerruela Garcia} and J. I. {Benavides Benitez} and F. {Mu{\~n}oz Garcia}", title = "Parallel Rendering of Radiance on Distributed Memory System by {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "502--507", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Geist:1997:ACP, author = "G. A. Geist", title = "Advanced Capabilities in {PVM 3.4}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "107--115", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Geist:1997:BPW, author = "G. A. Geist and J. A. Kohl and P. M. Papadopoulos and S. L. Scott", title = "Beyond {PVM 3.4}: What We've Learned, What's Next, and Why", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "116--126", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Geist:1997:CPF, author = "G. A. {Geist, II} and James Arthur Kohl and Philip M. Papadopoulos", title = "{CUMULVS}: Providing Fault Tolerance, Visualization, and Steering of Parallel Applications", journal = j-IJSAHPC, volume = "11", number = "3", pages = "224--235", month = "Fall", year = "1997", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Wed Jul 23 11:38:50 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Supercomputer Applications and High Performance Computing", } @Article{Gerlach:1997:ECS, author = "J. Gerlach and M. Sato and Y. Ishikawa", title = "Experiences with the {C++} Standard Template Library and {MPI} for a Parallel Particle Simulation Method", journal = j-LECT-NOTES-COMP-SCI, volume = "1225", pages = "961--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Aug 22 11:59:49 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gillett:1997:UMC, author = "Richard Gillett and Richard Kaufmann", title = "Using the {Memory Channel Network} --- Using a cluster of standard {PCI-based} servers with a low-cost network to improve communication performance", journal = j-IEEE-MICRO, volume = "17", number = "1", pages = "19--25", month = jan # "\slash " # feb, year = "1997", CODEN = "IEMIDZ", DOI = "https://doi.org/10.1109/40.566189", ISSN = "0272-1732 (print), 1937-4143 (electronic)", ISSN-L = "0272-1732", bibdate = "Mon Apr 7 14:39:59 MDT 1997", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Digital Equipment Corp", affiliationaddress = "MA, USA", classcodes = "C5610N (Network interfaces); C5620 (Computer networks and techniques)", classification = "716.1; 722.1; 722.3; 722.4; 723.1; 723.2", corpsource = "Digital Equip. Corp., USA", fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", journalabr = "IEEE Micro", keywords = "Bandwidth; clusters; Coding errors; Communication channels (information theory); Computer networks; computer networks; Computer software; Data communication systems; Data handling; Data storage equipment; Data transfer; DEC computers; Digital; Latency; Lock acquisition; Lock release; Memory channel; Memory Channel; Memory Channel Network; Message passing; Message size; message-passing; network for; network interfaces; Parallel processing systems; PCI bus; Performance; Raw message passing; Storage allocation (computer); Universal message passing; UNIX", treatment = "P Practical", } @Article{Goumopoulos:1997:PCS, author = "C. Goumopoulos and E. Housos and O. Liljenzin", title = "Parallel Crew Scheduling on Workstation Networks Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "470--477", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Grabowsky:1997:MBK, author = "Lothar Grabowsky", title = "{MPI-basierte Koppelrandkommunikation und Einfl{\"u}{\ss} der Partitionierung im 3D-Fall}. ({German}) [{MPI}-based coupled edge communication and influence of partitioning in {3D-Fall}]", type = "Preprint-Reihe des Chemnitzer SFB 393", number = "97,17", institution = "Universit{\"a}t Chemnitz-Zwickau", address = "Chemnitz, Germany", pages = "13", year = "1997", bibdate = "Wed Aug 27 06:53:21 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "German", } @Article{Grecki:1997:MPE, author = "M. Grecki and G. Jablonski and A. Napieralski", title = "{MOPS} --- Parallel Environment for Simulation of Electronic Circuits Using Physical Models of Semiconductor Devices", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "478--485", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:1997:HPM, author = "William Gropp and Ewing Lusk", title = "A high-performance {MPI} implementation on a shared-memory vector supercomputer", journal = j-PARALLEL-COMPUTING, volume = "22", number = "11", pages = "1513--1526", day = "26", month = jan, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:14:43 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=11; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=11&aid=1113", acknowledgement = ack-nhfb, affiliation = "Argonne Natl Lab", affiliationaddress = "IL", classification = "722.1; 722.2; 722.4; 921.1; C5220P (Parallel architecture); C5610N (Network interfaces); C6150N (Distributed systems software)", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Data storage equipment; Interfaces (computer); message passing; Message passing interface (mpi); Message-Passing Interface; MPI implementation; MPIC; NEC SX-4; network interfaces; parallel; Parallel processing systems; Shared memory multiprocessors; shared memory systems; shared-memory programming; shared-memory vector supercomputer; standards; supercomputer; Supercomputers; Vectors", treatment = "P Practical", } @Article{Gropp:1997:SMC, author = "W. Gropp and E. Lusk", title = "Sowing {MPICH}: a Case Study in the Dissemination of a Portable Environment for Parallel Scientific Computing", journal = j-IJSAHPC, volume = "11", number = "2", pages = "103--114", month = "Summer", year = "1997", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Thu Jun 26 18:17:48 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Supercomputer Applications and High Performance Computing", } @Article{Gropp:1997:WPM, author = "W. Gropp and E. Lusk", title = "Why Are {PVM} and {MPI} So Different?", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "3--10", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Guan:1997:PDI, author = "Huiwei Guan and Chi-kwong Li and To-yat Cheung and Songnian Yu", title = "Parallel design and implementation of {SOM} neural computing model in {PVM} environment of a distributed system", crossref = "IEEE:1997:APD", pages = "26--31", year = "1997", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C1230D (Neural nets); C5220P (Parallel architecture); C5290 (Neural computing techniques)", conflocation = "Shanghai, China; 19-21 March 1997", conftitle = "Proceedings. Advances in Parallel and Distributed Computing", corpsource = "Dept. of Comput. Sci., City Univ. of Hong Kong, Hong Kong", keywords = "architectures; distributed; machines; message passing; neural net architecture; parallel; parallel virtual machine; PVM environment; self-organising feature maps; SOM neural computing model; system; virtual", treatment = "T Theoretical or Mathematical", } @Article{Hempel:1997:IMN, author = "R. Hempel and H. Ritzdorf and F. Zimmermann", title = "Implementation of {MPI} on {NEC}'s {SX-4} Multi-Node Architecture", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "185--193", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Hoyos-Rivera:1997:UPB, author = "G. J. Hoyos-Rivera and V. G. Sanchez-Arias", title = "Using {PVM} to Build an Interface to Support Cooperative Work in a Distributed Systems Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "127--134", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Hwang:1997:EMC, author = "Kai Hwang and Choming Wang and Cho-Li Wang", title = "Evaluating {MPI} collective communication on the {SP2}, {T3D}, and {Paragon} multicomputers", crossref = "IEEE:1997:TIS", pages = "106--115", year = "1997", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing)", conftitle = "Proceedings Third International Symposium on High-Performance Computer Architecture", corpsource = "Hong Kong Univ., Hong Kong", keywords = "architectural support; closed-form expressions; Cray T3D; IBM SP2; Intel Paragon; message passing; MPI collective communication evaluation; multiprocessing systems; Paragon multicomputers; performance evaluation; STAP benchmark experiments; startup latency; synchronisation; timing; timing performance", sponsororg = "IEEE Computer. Soc. Tech. Committee on Comput. Archit", treatment = "P Practical", } @Article{Jabbarzadeh:1997:PSS, author = "A. Jabbarzadeh and J. D. Atkinson and R. I. Tanner", title = "Parallel simulation of shear flow of polymers between structured walls by molecular dynamics simulation on {PVM}", journal = j-COMP-PHYS-COMM, volume = "107", number = "1--3", pages = "123--136", month = dec, year = "1997", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(97)00088-X", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 21:30:21 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S001046559700088X", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Jackson:1997:SYE, author = "D. J. Jackson and C. W. Humphres", title = "A simple yet effective load balancing extension to the {PVM} software system", journal = j-PARALLEL-COMPUTING, volume = "22", number = "12", pages = "1647--1660", day = "21", month = feb, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=12; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=12&aid=1112", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C5260B (Computer vision and image processing techniques); C6110P (Parallel programming); C6150E (General utility programs); C6150N (Distributed systems software)", corpsource = "Dept. of Electr. Eng., Alabama Univ., Tuscaloosa, AL, USA", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", keywords = "algorithms; application program interfaces; coding; data compression; host CPU load information; image; information gathering; load; load balancing; load balancing extension; master process; matrix algebra; matrix oriented; NAS parallel benchmarks; parallel; parallel algorithms; parallel fractal image compression algorithm; parallel runtime performance; process spawn; processes; programming; programming interface; PVM software system; resource allocation; slave", pubcountry = "Netherlands", treatment = "P Practical", } @Article{Kacsuk:1997:GDD, author = "Peter Kacsuk and Jose C. Cunha and Gabor Dozsa and Joao Lourenco and Tibor Fadgyas and Tiago Antao", title = "A graphical development and debugging environment for parallel programs", journal = j-PARALLEL-COMPUTING, volume = "22", number = "13", pages = "1747--1770", day = "28", month = feb, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Oct 21 15:14:48 MDT 1997", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=13; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=13&aid=1126", acknowledgement = ack-nhfb, affiliation = "Hungarian Acad of Sciences", affiliationaddress = "Budapest, Hung", classification = "722.2; 722.4; 723.1; 723.1.1; 723.5; C6110P (Parallel programming); C6110V (Visual programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C6180G (Graphical user interfaces)", conference = "Proceedings of the 1996 Workshop on Distributed and Parallel Systems, DAPSYS", corpsource = "KFKI-MSZKI Res. Inst. for Meas. and Comput. Tech., Hungarian Acad. of Sci., Budapest, Hungary", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "abstraction mechanism; complex programming environment; Computer aided software engineering; Computer programming; Computer programming languages; data visualization; debugging; debugging engine; debugging environment; distributed; Distributed computer systems; Distributed debugging engine; distributed memory computer architectures; GRADE; graphical; graphical development; graphical user interface; Graphical user interfaces; graphical user interfaces; GRAPNEL; high-level graphical support; language; languages; machine; message-; parallel; Parallel processing systems; parallel programming; Parallel programs; parallel virtual; Parallel virtual machine; passing parallel programs; performance monitoring; program; Program debugging; programming environments; programs; PROVE; Software Package grade; Software Package grapnel; software tools; Tape/PVM; visual", meetingaddress = "Miskolc, Hung", meetingdate = "Oct 1996", meetingdate2 = "10/96", treatment = "A Application; P Practical", } @Article{Kitowski:1997:CPM, author = "J. Kitowski and K. Boryczko and J. Moscinski", title = "Comparison of {PVM} and {MPI} Performance in Short-Range Molecular Dynamics Simulation", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "11--16", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Konuru:1997:MUL, author = "Ravi B. Konuru and Steve W. Otto and Jonathan Walpole", title = "A Migratable User-Level Process Package for {PVM}", journal = j-J-PAR-DIST-COMP, volume = "40", number = "1", pages = "81--102", day = "10", month = jan, year = "1997", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1996.1270", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:01 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production/ref", acknowledgement = ack-nhfb, classification = "C4240C (Computational complexity); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6115 (Programming support); C6150G (Diagnostic, testing, debugging and evaluating systems); C7430 (Computer engineering)", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "adaptive load distribution; application debugging; application transparency; application-transparent migration; availability; based programming model; computational complexity; computing; distributed memory multiprocessor; dynamic environment; dynamic multiprocessor environment; machine; machines; message passing; message-; microbenchmarks; migratable user-level process package; parallel; parallel programming; parallel virtual; program debugging; PVM; system load; unobtrusive; unpredictable variability; user-level process; virtual; virtual processor; workstation; workstation networks; workstation ownership", treatment = "A Application; P Practical", } @Article{Kormicki:1997:PLS, author = "Maciek Kormicki and Ausif Mahmood and Bradley S. Carlson", title = "Parallel logic simulation on a network of workstations using parallel virtual machine", journal = j-TODAES, volume = "2", number = "2", pages = "123--134", month = jan, year = "1997", CODEN = "ATASFO", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Jul 27 10:05:33 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf; http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/", abstract = "This paper explores parallel logic simulation on a network of workstations using a parallel virtual machine (PVM). A novel parallel implementation of the centralized-time event-driven logic simulation algorithm is carried out such that no global controlling workstation is needed to synchronize the advance of simulation time. Further advantages of our new approach include a random partitioning of the circuit onto available workstations and a pipelined execution of the different phases of the simulation algorithm. To achieve a better load balance, we employ a semioptimistic scheme for gate evaluations (in conjunction with a centralized-time algorithm) such that no rollback is required. The performance of this implementation has been evaluated using the ISCAS benchmark circuits. Speedups improve with the size of the circuit and the activity level in the circuit. Analyses of the communication overhead show that the techniques developed here will yield even higher gains as newer networking technologies like ATM are employed to connect workstations.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", generalterms = "Algorithms; Performance; Verification", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "distributed computing; parallel logic simulation; PVM; synchronous simulation", subject = "Hardware --- Logic Design --- Design Aids (B.6.3): {\bf Simulation}; Hardware --- Integrated Circuits --- Design Aids (B.7.2): {\bf Simulation}", } @Article{Krantz:1997:CSC, author = "A. T. Krantz and V. S. Sunderam", title = "Client Server Computing on Message Passing Systems: Experiences with {PVM-RPC}", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "110--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Krotz-Vogel:1997:PPP, author = "W. Krotz-Vogel and H.-C. Hoppe", title = "The {PALLAS} Parallel Programming Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "257--266", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lauria:1997:MFH, author = "Mario Lauria and Andrew Chien", title = "{MPI-FM}: High Performance {MPI} on Workstation Clusters", journal = j-J-PAR-DIST-COMP, volume = "40", number = "1", pages = "4--18", day = "10", month = jan, year = "1997", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1996.1264", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:01 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production/ref", acknowledgement = ack-nhfb, classification = "B6150M (Protocols); B6210L (Computer communications); C5440 (Multiprocessing systems); C5470 (Performance evaluation and testing); C5620L (Local area networks); C5640 (Protocols); C5670 (Network performance)", corpsource = "Dipartimento di Inf. e Sistemistica, Naples Univ., Italy", fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", keywords = "application level; bandwidth; communication layers; communication performance; communication pipeline stages; communication software; Cray; Cray T3D; design solutions; evaluation; fast messages library; hardware performance; high; high level messaging library; high performance MPI; high speed LANs; IBM SP2; interface; latency; level messaging layer; local area networks; low; low level; low level communication layers; low level messaging layer; message passing; message passing interface; minimum; minimum one-way latency; MPI-FM; Myrinet network; one-way latency; performance; performance evaluation; protocols; SPARCstation 20 workstations; speed LANs; T3D; workstation clusters; workstations", treatment = "A Application; P Practical", } @InProceedings{Li:1997:EHC, author = "Konming Gary Li and Nabil M. Zamel", title = "An Evaluation of {HPF} Compilers and the Implementation of a Parallel Linear Equation Solver Using {HPF} and {MPI}", crossref = "ACM:1997:SHP", pages = "??--??", year = "1997", bibdate = "Sat Mar 21 08:51:09 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.supercomp.org/sc97/proceedings/TECH/LI/INDEX.HTM", acknowledgement = ack-nhfb, } @Article{Li:1997:PIO, author = "Wei Li and Xiaohu Huang and Nanning Zheng", title = "Parallel implementing {OpenGL} on {PVM}", journal = j-PARALLEL-COMPUTING, volume = "23", number = "12", pages = "1839--1850", day = "15", month = dec, year = "1997", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:15:16 MDT 1999", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=23&issue=12; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=23&issue=12&aid=1248", acknowledgement = ack-nhfb, affiliation = "Xi'an Jiaotong Univ", affiliationaddress = "Xi'an, China", classification = "722.4; 723.2", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Algorithms; Data decomposition; Image processing; Load balancing; Parallel processing systems; Parallel virtual machine; Task granularity; Three dimensional; Virtual reality", } @Article{Lu:1997:QPD, author = "Honghui Lu and Sandhya Dwarkadas and Alan L. Cox and Willy Zwaenepoel", title = "Quantifying the Performance Differences between {PVM} and {TreadMarks}", journal = j-J-PAR-DIST-COMP, volume = "43", number = "2", pages = "65--78", day = "15", month = jun, year = "1997", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.1997.1332", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 9 09:19:03 MST 2000", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production; http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production/ref", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Ludwig:1997:OUI, author = "T. Ludwig and R. Wismueller", title = "{OMIS 2.0} --- a Universal Interface for Monitoring Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "267--276", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Luecke:1997:HPF, author = "G. R. Luecke and J. J. Coyle", title = "{High Performance Fortran} versus explicit message passing on the {IBM SP-2} for the parallel {LU}, {QR}, and {Cholesky} factorizations", journal = j-SUPERCOMPUTER, volume = "13", number = "2", pages = "4--14", month = "????", year = "1997", CODEN = "SPCOEL", ISSN = "0168-7875", bibdate = "Wed Mar 18 08:37:01 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C4140 (Linear algebra); C5440 (Multiprocessing systems); C6110P (Parallel programming); C6150N (Distributed systems software); C7310 (Mathematics computing)", corpsource = "Iowa State Univ., Ames, IA, USA", fjournal = "Supercomputer", keywords = "BLACS; BLAS; Cholesky factorizations; ESSL library; explicit message passing; FORTRAN; High Performance Fortran; high-performance parallel implementations; IBM computers; IBM SP- 2; LU factorizations; mathematics computing; matrix decomposition; message passing; MPI version; parallel computer; parallel languages; parallel machines; parallel programming; QR factorizations; SCALAPACK; software development; software libraries; software maintenance; software performance evaluation; Visual Numerics", pubcountry = "Netherlands", treatment = "P Practical", } @Article{Manegold:1997:QBM, author = "S. Manegold and F. Waas and D. Gudlat", title = "In Quest of the Bottleneck --- Monitoring Parallel Database Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "277--284", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mazzariol:1997:PCS, author = "M. Mazzariol and B. A. Gennart and V. Messerli and R. D. Hersch", title = "Performance of {CAP}-Specified Linear Algebra Algorithms", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "351--358", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{McDonald:1997:IPT, author = "Chris McDonald and Kamran Kazemi", title = "Improving the {PVM} teaching environment", journal = j-SIGCSE, volume = "29", number = "1", pages = "219--223", month = mar, year = "1997", CODEN = "SIGSD3", DOI = "https://doi.org/10.1145/268085.268167", ISSN = "0097-8418 (print), 2331-3927 (electronic)", ISSN-L = "0097-8418", bibdate = "Sat Nov 17 18:57:38 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigcse1990.bib", abstract = "The parallel programming community has long recognized the need for a simple programming environment offering interprocess communication between heterogeneous systems. As the Parallel Virtual Machine environment, PVM, has emerged to meet this goal, an increasing number of educational institutions are choosing PVM to support their teaching of parallel and distributed computing using networks of workstations. However, it is often the nature of PVM's design and implementation that can severely limit its success in a teaching environment. This paper first motivates and then describes improvements to the PVM environment which increase both robustness and efficiency in an educational setting.", acknowledgement = ack-nhfb, fjournal = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688", } @Article{Mintchev:1997:TPM, author = "S. Mintchev and V. Getov", title = "Towards Portable Message Passing in {Java}: Binding {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "135--142", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mysliwiec:1997:CAM, author = "G. Mysliwiec and J. Sipowicz and R. Schaefer", title = "Control Activities in Message Passing Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "143--150", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mysliwiec:1997:IPS, author = "G. Mysliwiec and J. Sipowicz and H. Burkhart", title = "Implementing Parallel {SBS}-Type Linear Solvers Using {ALWAN}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "359--366", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Overeinder:1997:BCD, author = "B. J. Overeinder and P. M. A. Sloot", title = "Breaking the Curse of Dynamics by Task Migration: Pilot Experiments in the {Polder Metacomputer}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "194--207", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{Pacheco:1997:PPM, author = "Peter S. Pacheco", title = "Parallel programming with {MPI}", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xxii + 418", year = "1997", ISBN = "1-55860-339-5", ISBN-13 = "978-1-55860-339-4", LCCN = "QA76.642 .P3 1997", bibdate = "Fri Feb 04 17:32:19 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Peinado:1997:HPC, author = "M. Peinado and R. Venkatesan", title = "Highly Parallel Cryptographic Attacks", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "367--374", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pernice:1997:BRM, author = "Michael Pernice", title = "Book Review: {{\em MPI: The Complete Reference}}", journal = j-IEEE-CONCURR, volume = "5", number = "1", pages = "80--81", month = jan # "\slash " # mar, year = "1997", CODEN = "IECMFX", DOI = "https://doi.org/10.1109/MCC.1997.580453", ISSN = "1092-3063 (print), 1558-0849 (electronic)", ISSN-L = "1092-3063", bibdate = "Tue Jan 16 06:49:26 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/pd/books/pd1997/pdf/p1080.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Concurrency", } @Article{Petcu:1997:ISM, author = "D. Petcu", title = "Implementation of Some Multiprocessor Algorithms for {ODEs} Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "375--382", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Piernas:1997:APM, author = "J. Piernas and A. Flores and J. M. Garcia", title = "Analyzing the Performance of {MPI} in a Cluster of Workstations Based on {Fast Ethernet}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "17--24", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Reinhard:1997:MHP, author = "E. Reinhard and A. Chalmers", title = "Message Handling in Parallel Radiance", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "486--493", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Resch:1997:CMP, author = "M. Resch and H. Berger and T. Boenisch", title = "A Comparison of {MPI} Performance on Different {MPPs}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "25--32", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Resch:1997:PM, author = "Michael Resch and Thomas Beisel and Holger Berger", title = "{PACX-MPI}", type = "{BI: Informationen f{\"u}r Nutzer des Rechenzentrums}", number = "1997,11/12", institution = "Universit{\"a}t Stuttgart, Zentrale Universit{\"a}tseinrichtung", address = "Stuttgart, Germany", year = "1997", bibdate = "Wed Aug 27 07:18:18 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @TechReport{Resch:1997:PMC, author = "Michael Resch and Holger Berger and Thomas B{\"o}nisch", title = "Performance of {MPI} on a {Cray T3E-512}", type = "{BI: Informationen f{\"u}r Nutzer des Rechenzentrums}", number = "1997,5/6", institution = "Universit{\"a}t Stuttgart, Zentrale Universit{\"a}tseinrichtung", address = "Stuttgart, Germany", pages = "??", year = "1997", bibdate = "Wed Aug 27 07:14:37 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Third European CRAY-SGI MPP Workshop.", acknowledgement = ack-nhfb, } @Article{Roda:1997:PPI, author = "J. L. Roda and C. Rodriguez and F. Almeida and D. Gonzalez-Morales", title = "Predicting the Performance of Injection Communication Patterns on {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "33--40", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Rough:1997:PRD, author = "J. Rough and A. Goscinski and D. {De Paoli}", title = "{PVM} on the {RHODOS} Distributed Operating System", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "208--218", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Roy:1997:PNT, author = "R. Roy and Z. Stankovski", title = "Parallelization of Neutron Transport Solvers", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "494--501", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Santos:1997:ECP, author = "L. P. Santos and V. Castro and A. Proenca", title = "Evaluation of the Communication Performance on a Parallel Processing System", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "41--48", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Saphir:1997:SMI, author = "William Saphir", title = "A Survey of {MPI} Implementations", journal = "NHSE Review", volume = "2", number = "1", pages = "??--??", month = nov, year = "1997", bibdate = "Wed Jan 14 05:59:12 2004", bibsource = "http://www.crpc.rice.edu/NHSEreview/96-1.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "National HPCC Software Exchange (NHSE); Rice University", } @Article{Serot:1997:EPF, author = "J. Serot", title = "Embodying Parallel Functional Skeletons: An Experimental Implementation on Top of {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "629--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Silva:1997:IPD, author = "Luis M. Silva and Joao Gabriel Silva and Simon Chapple", title = "Implementation and Performance of {DSMPI}", journal = j-SCI-PROG, volume = "6", number = "2", pages = "201--214", month = "Summer", year = "1997", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; ftp://ftp.ira.uka.de/bibliography/Parallel/dsm.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", xxauthor = "L. M. Silva and S. Chapple and J. G. Silva", xxpages = "210--214", } @Article{Soch:1997:PGP, author = "M. Soch and P. Tvrdik and M. Volf", title = "Parallel Graph-Partitioning Using the Mob Heuristic", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "383--389", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Song:1997:ALL, author = "Jianjian Song and Heng Kek Choo and Kuok Ming Lee", title = "Application-level load migration and its implementation on top of {PVM}", journal = j-CPE, volume = "9", number = "1", pages = "1--19", month = jan, year = "1997", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Wed Apr 16 06:39:19 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C6150J (Operating systems); C6150N (Distributed systems software)", corpsource = "Nat. Supercomput. Res. Center, Nat. Univ. of Singapore, Singapore", fjournal = "Concurrency, practice and experience", keywords = "concurrency; load migration; location transparency; network operating systems; operating systems (computers); parallel processing; process migration; PVM; receive buffer; residual dependency; resource allocation; virtual machines", pubcountry = "UK", treatment = "P Practical", } @Article{Souza:1997:EPH, author = "P. S. Souza and L. J. Senger and M. J. Santana and R. C. Santana", title = "Evaluating Personal High Performance Computing with {PVM} on {Windows} and {LINUX} Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "49--56", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Squyres:1997:DEM, author = "J. M. Squyres and B. Saphir and A. Lumsdaine", title = "The Design and Evolution of the {MPI-2 C++} Interface", journal = j-LECT-NOTES-COMP-SCI, volume = "1343", pages = "57--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Stellner:1997:LBB, author = "G. Stellner and J. Trinitis", title = "Load Balancing Based on Process Migration for {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "150--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Strietzel:1997:PTS, author = "M. Strietzel", title = "Parallel Turbulence Simulation: Resolving the Inertial Subrange of {Kolmogorov}'s Spectra", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "508--516", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sunderam:1997:TAS, author = "V. Sunderam and B. Topol and S. Moyer and A. Krantz", title = "Tools and Auxiliary Subsystems in {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "285--294", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Theodoropoulos:1997:GSP, author = "P. Theodoropoulos and P. Tsanakas and G. Papakonstantinou", title = "Global Semaphores in a Parallel Programming Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "151--158", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Uminski:1997:EEP, author = "P. W. Uminski and M. R. Matuszek and H. Krawczyk", title = "Experimental Evaluation of {PVM} Group Communication", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "57--66", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{vandeGeijn:1997:UPP, author = "Robert A. {van de Geijn}", title = "Using {PLAPACK}: Parallel Linear Algebra Package", publisher = pub-MIT, address = pub-MIT:adr, pages = "xvii + 194", year = "1997", ISBN = "0-262-72026-4", ISBN-13 = "978-0-262-72026-7", LCCN = "QA185.D37 V36 1997", bibdate = "Fri Dec 19 10:39:21 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "With contributions by Philip Alpatov and others.", price = "US\$27.50", acknowledgement = ack-nhfb, } @Article{Vlassov:1997:SSM, author = "V. Vlassov and L.- E. Thorelli", title = "A Synchronizing Shared Memory: Model and Programming Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "159--166", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Wang:1997:TPD, author = "Paul S. Wang", title = "Tools for parallel\slash distributed mathematical computation", crossref = "ACM:1997:PPS", pages = "188--195", year = "1997", bibdate = "Tue Sep 28 07:51:05 MDT 1999", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, affiliation = "Kent State Univ", affiliationaddress = "USA", classification = "722.3; 722.4; 723; 723.5; 921; 921.1", keywords = "Algebra; Common Lisp; Computational methods; Computer networks; Computer software; Data communication systems; Interfaces (computer); Lisp (programming language); Multi protocol (MP); Multiple instruction multiple data (MIMD) parallel machines; Network protocols; Parallel processing systems; Parallel virtual machines (PVM); Program compilers; Symbolic and algebraic computation (SAC); Virtual reality", } @Article{Winstanley:1997:PDP, author = "N. Winstanley and J. O'Donnell", title = "Parallel Distributed Programming with {Haskell+PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1300", pages = "670--??", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wismueller:1997:DMP, author = "R. Wismueller", title = "Debugging Message Passing Programs Using Invisible Message Tags", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "295--304", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wolf:1997:CMP, author = "K. Wolf and E. Brakkee and D. P. Ho", title = "Communication in Multi-Physics Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "167--176", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Yalamov:1997:BRT, author = "Plamen Y. Yalamov and Svetozar Margenov", title = "Book Reviews: Two books on {MPI}: {{\em Parallel Programming with MPI}}; {{\em MPI: The Complete Reference (2nd printing)}}", journal = j-IEEE-CONCURR, volume = "5", number = "4", pages = "81--81", month = oct # "\slash " # dec, year = "1997", CODEN = "IECMFX", DOI = "https://doi.org/10.1109/MCC.1997.580454", ISSN = "1092-3063 (print), 1558-0849 (electronic)", ISSN-L = "1092-3063", bibdate = "Mon Jun 7 07:52:29 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/pd/books/pd1997/pdf/p4080.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Concurrency", } @Article{Zhang:1997:DED, author = "Xiaodong Zhang and Sandra G. Dykes and Hong Deng", title = "Distributed Edge Detection: Issues and Implementations", journal = j-IEEE-COMPUT-SCI-ENG, volume = "4", number = "1", pages = "72--82", month = jan # "\slash " # mar, year = "1997", CODEN = "ISCEE4", DOI = "https://doi.org/10.1109/99.590860", ISSN = "1070-9924 (print), 1558-190X (electronic)", ISSN-L = "1070-9924", bibdate = "Sat Jan 9 08:57:23 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c1072.pdf; http://www.computer.org/cse/cs1998/c1072abs.htm", abstract = "Experiments in parallelizing an edge detection algorithm on three representative message-passing architectures --- a low-cost, heterogeneous PVM network, an Intel {iPSC\slash 860} hypercube, and a {CM-5} massively parallel multicomputer --- provide insight into implementation and performance issues for image-processing applications.", acknowledgement = ack-nhfb, fjournal = "IEEE Computational Science \& Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99", } @Article{Zilli:1997:TBN, author = "G. Zilli and L. Bergamaschi", title = "Truncated Block {Newton} and Quasi-{Newton} Methods for Sparse Systems of Nonlinear Equations. Experiments on Parallel Platforms", journal = j-LECT-NOTES-COMP-SCI, volume = "1332", pages = "390--400", year = "1997", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Dec 9 06:27:54 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{Adamo:1998:MTO, author = "Jean-Marc Adamo", title = "Multi-threaded object-oriented {MPI}-based message passing interface: the {ARCH} library", volume = "SECS 446", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "xiv + 185", year = "1998", ISBN = "0-7923-8165-3", ISBN-13 = "978-0-7923-8165-5", LCCN = "TK5102.5.A293 1998", bibdate = "Mon May 17 18:15:19 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$120.00", series = "The Kluwer international series in engineering and computer science", acknowledgement = ack-nhfb, keywords = "data transmission systems; object-oriented programming (computer science); threads (computer programs)", libnote = "Not yet in my library.", } @Article{Alexandrov:1998:CGP, author = "V. Alexandrov and F. Dehne and A. Rau-Chaplin and K. Taft", title = "Coarse Grained Parallel {Monte Carlo} Algorithms for Solving {SLAE} Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "323--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Andersson:1998:PFT, author = "U. Andersson", title = "Parallelization of a {$3$D FD-TD} Code for the {Maxwell} Equations Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1541", pages = "12--19", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Sep 15 10:01:31 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1998b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "applied parallel computing; computing science; PARA; parallel computing", } @TechReport{Andre:1998:BVN, author = "Andr{\'e} Fachat and Karl Heinz Hoffmann", title = "Blocking vs. non-blocking communication under {MPI} on a Master-Workerproblem", type = "{Preprint-Reihe des Chemnitzer SFB 393 Sonderforschungsbereich NumerischeSimulation auf Massiv Parallelen Rechnern}", number = "98,18", institution = "Universit{\"a}t Chemnitz-Zwickau", address = "Chemnitz, Germany", year = "1998", bibdate = "Wed Aug 27 07:09:52 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Anonymous:1998:ANO, author = "Anonymous", title = "Announcements: New Official {Fortran} Technical Reports; Working Group 5 Documents; {OpenGL} {Fortran 95} Bindings; {MPI} Module Provides Enhanced {Fortran} Support; Variable Precision Arithmetic; {Fortran} Information Sites; New {Fortran} Compiler Versions from {Lahey} and {Fujitsu}; Downloadable Advanced {Fortran} Textbook; {Fortran} Engineering Textbook", journal = j-FORTRAN-FORUM, volume = "17", number = "3", pages = "1--2", month = dec, year = "1998", CODEN = "????", ISSN = "1061-7264 (print), 1931-1311 (electronic)", ISSN-L = "1061-7264", bibdate = "Thu Feb 07 13:34:54 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM Fortran Forum", issue = "53", } @Article{Baker:1998:MNC, author = "M. Baker", title = "{MPI} on {NT}: The Current Status and Performance of the Available Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "63--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Baker:1998:MNP, author = "M. Baker and G. Fox", title = "{MPI} on {NT}: a Preliminary Evaluation of the Available Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1388", pages = "549--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Berthou:1998:PHM, author = "J.-Y. Berthou and L. Plagne", title = "Parallel {HPF-MPI} Implementation of the {TBSCM} {Poisson} Solver", journal = j-LECT-NOTES-COMP-SCI, volume = "1401", pages = "252--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Bhanot:1998:DTM, author = "Gyan Bhanot", title = "A $2$-d transpose {MPI} code", type = "Research report", number = "RC 21217", institution = "T. J. Watson Research Center, IBM Corporation", address = "Almaden, CA, USA", year = "1998", bibdate = "Wed Aug 27 07:16:38 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Browne:1998:RPA, author = "Shirley Browne and Jack Dongarra and Kevin London", title = "Review of Performance Analysis Tools for {MPI} Parallel Programs", journal = "NHSE Review", volume = "3", year = "1998", CODEN = "????", ISSN = "????", bibdate = "Tue Feb 26 10:10:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Accepted, to appear.", URL = "http://www.cs.utk.edu/~browne/perftools-review/", acknowledgement = ack-nhfb, keywords = "National HPCC Software Exchange (NHSE); Rice University", remark = "This journal ceased publication in 1997.", } @Article{Bubak:1998:PCL, author = "M. Bubak and P. Luszczek and A. Wierzbowska", title = "Porting {CHAOS} Library to {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "131--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Carissimi:1998:AEM, author = "A. Carissimi and M. Pasin", title = "{Athapascan}: An Experience on Mixing {MPI} Communications and Threads", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "137--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ceron:1998:PID, author = "C. Ceron and J. Dopazo and E. L. Zapata and J. M. Carazo and O. Trelles", title = "Parallel implementation of {DNAml} program on message-passing architectures", journal = j-PARALLEL-COMPUTING, volume = "24", number = "5--6", pages = "701--716", day = "1", month = jun, year = "1998", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sun Oct 25 09:30:12 MST 1998", bibsource = "Compendex database; http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1998&volume=24&issue=5-6; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1998/24/5-6/1279.pdf", acknowledgement = ack-nhfb, affiliation = "Univ of Malaga", affiliationaddress = "Malaga, Spain", classification = "722; 722.4; 723; 723.2", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", journalabr = "Parallel Comput", keywords = "Computer architecture; Computer software; Message passing computer architecture; Natural sciences computing; Parallel algorithms; Parallel processing systems; Parallel virtual machines (PVM)", } @Article{Chan:1998:PCT, author = "K. J. Chan and A. M. Gibbons and M. Pias and W. Rytter", title = "On the {PVM} Computations of Transitive Closure and Algebraic Path Problems", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "338--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Chapman:1998:OHI, author = "B. Chapman and P. Mehrotra", title = "{OpenMP} and {HPF}: Integrating Two Paradigms", journal = j-LECT-NOTES-COMP-SCI, volume = "1470", pages = "650--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/hpfortran.bib; https://www.math.utah.edu/pub/tex/bib/lncs1998b.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Chetlur:1998:ALE, author = "M. Chetlur and G. D. Sharma and N. Abu-Ghazaleh and U. K. V. Rajasekaran", title = "An Active Layer Extension to {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "97--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Clark:1998:FOP, author = "David Clark", title = "Focus: {OpenMP}: a parallel standard for the masses", journal = j-IEEE-CONCURR, volume = "6", number = "1", pages = "10--12", month = jan # "\slash " # mar, year = "1998", CODEN = "IECMFX", DOI = "https://doi.org/10.1109/4434.656771", ISSN = "1092-3063 (print), 1558-0849 (electronic)", ISSN-L = "1092-3063", bibdate = "Tue Jan 16 06:04:49 MST 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeeconcurrency.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/pd/books/pd1998/pdf/p1010.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Concurrency", } @Article{Cotronis:1998:DMP, author = "Y. Cotronis", title = "Developing Message-Passing Applications on {MPICH} under Ensemble", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "145--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Cunha:1998:MPP, author = "J. C. Cunha and V. Duarte", title = "Monitoring {PVM} Programs Using the {DAMS} Approach", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "273--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dagum:1998:OIS, author = "Leonardo Dagum and Ramesh Menon", title = "{OpenMP}: An Industry-Standard {API} for Shared-Memory Programming", journal = j-IEEE-COMPUT-SCI-ENG, volume = "5", number = "1", pages = "46--55", month = jan # "\slash " # mar, year = "1998", CODEN = "ISCEE4", DOI = "https://doi.org/10.1109/99.660313", ISSN = "1070-9924 (print), 1558-190X (electronic)", ISSN-L = "1070-9924", bibdate = "Sat Jan 9 08:57:23 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeecomputscieng.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/cs/books/cs1998/pdf/c1046.pdf; http://www.computer.org/cse/cs1998/c1046abs.htm", acknowledgement = ack-nhfb, fjournal = "IEEE Computational Science \& Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99", } @Article{Dantas:1998:ESM, author = "M. A. R. Dantas and E. J. Zaluska", title = "Efficient scheduling of {MPI} applications on networks of workstations", journal = j-FUT-GEN-COMP-SYS, volume = "13", number = "6", pages = "489--499", day = "20", month = may, year = "1998", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 27 12:41:17 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/0167739X; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/19/19/28/20/21/abstract.html", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Delves:1998:HPF, author = "M. Delves and H. Zima", title = "{High Performance Fortran}: a Status Report or: Are We Ready to Give Up {MPI}?", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "161--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dimov:1998:IMC, author = "I. Dimov and V. Alexandrov and A. Karaivanova", title = "Implementation of {Monte Carlo} Algorithms for Eigenvalue Problem Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "346--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Espinosa:1998:ADP, author = "A. Espinosa and T. Margalef and E. Luque", title = "Automatic Detection of {PVM} Program Performance Problems", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "19--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fagg:1998:MMH, author = "G. E. Fagg and K. S. London and J. J. Dongarra", title = "{MPIConnect}: Managing Heterogeneous {MPI} Applications Interoperation and Process Control", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "93--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{Fang:1998:DDL, author = "Niandong Fang", title = "Distributed data library and tools for an {MPI} programming environment", volume = "1", publisher = "Shaker", address = "Aachen, Germany", pages = "xx + 195", year = "1998", ISBN = "3-8265-4101-4", ISBN-13 = "978-3-8265-4101-8", LCCN = "????", bibdate = "Wed Aug 27 06:49:31 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Also published as dissertation of the University of Basel.", series = "Research reports in computer science", acknowledgement = ack-nhfb, } @InProceedings{Ferrari:1998:JNPa, author = "Adam J. Ferrari", title = "{JPVM}: Network Parallel Computing in {Java}", crossref = "ACM:1998:AWJ", pages = "??--??", year = "1998", bibdate = "Thu Apr 27 10:43:08 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.ucsb.edu/conferences/java98/papers/jpvm.pdf; http://www.cs.ucsb.edu/conferences/java98/papers/jpvm.ps", acknowledgement = ack-nhfb, } @Article{Ferrari:1998:JNPb, author = "Adam Ferrari", title = "{JPVM}: network parallel computing in {Java}", journal = j-CPE, volume = "10", number = "11--13", pages = "985--992", month = sep, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:44 MDT 1999", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", note = "Special Issue: Java for High-performance Network Computing.", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=10050413; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=10050413&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Ferrari:1998:MDC, author = "Adam Ferrari and V. S. Sunderam", title = "Multiparadigm distributed computing with {TPVM}", journal = j-CPE, volume = "10", number = "3", pages = "199--228", month = mar, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:39 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5374; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5374&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Folino:1998:EMC, author = "G. Folino and G. Spezzano and D. Talia", title = "Evaluating and Modeling Communication Overhead of {MPI} Primitives on the {Meiko CS-2}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "27--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Folino:1998:PEM, author = "G. Folino and G. Spezzano and D. Talia", title = "Performance Evaluation and Modelling of {MPI} Communications on the {Meiko CS-2}", journal = j-LECT-NOTES-COMP-SCI, volume = "1401", pages = "932--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Foster:1998:GEM, author = "Ian Foster", title = "A Grid-Enabled {MPI}: Message Passing in Heterogeneous Distributed Computing Systems", crossref = "ACM:1998:SHP", pages = "??--??", year = "1998", bibdate = "Wed Oct 07 08:50:26 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.supercomp.org/sc98/papers/", acknowledgement = ack-nhfb, } @Article{Foster:1998:WAI, author = "Ian Foster and Jonathan Geisler and William Gropp and Nicholas Karonis and Ewing Lusk and George Thiruvathukal and Steven Tuecke", title = "Wide-area implementation of the {Message Passing Interface}", journal = j-PARALLEL-COMPUTING, volume = "24", number = "12--13", pages = "1735--1749", day = "1", month = nov, year = "1998", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:15:40 MDT 1999", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1998&volume=24&issue=12-13; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1998/24/12-13/1352.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Fuerle:1998:IPC, author = "T. Fuerle and E. Schikuta and C. Loeffelhardt and K. Stockinger", title = "On the Implementation of a Portable, Client-Server Based {MPI-IO} Interface", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "172--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Garcia-Consuegra:1998:DGR, author = "J. D. Garcia-Consuegra and J. A. Gallud and G. Sebastian", title = "Distributed Georeferring of Remotely Sensed {Landsat-TM} Imagery Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1541", pages = "161--166", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Sep 15 10:01:31 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1998b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "applied parallel computing; computing science; PARA; parallel computing", } @Article{Geist:1998:HNG, author = "G. A. Geist", title = "{Harness}: The Next Generation Beyond {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "74--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gorlatch:1998:GMI, author = "Sergei Gorlatch and Holger Bischof", title = "A Generic {MPI} Implementation for a Data-Parallel Skeleton: Formal Derivation and Application to {FFT}", journal = j-PARALLEL-PROCESS-LETT, volume = "8", number = "4", pages = "447--??", month = dec, year = "1998", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Jan 6 12:02:34 MST 2005", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Goujon:1998:AAT, author = "D. S. Goujon and M. Michel and J. Peeters and J. E. Devaney", title = "{AutoMap} and {AutoLink}: Tools for Communicating Complex and Dynamic Data-Structures Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1362", pages = "98--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Grabowsky:1998:NMP, author = "Lothar Grabowsky and Thomas Ermer and J{\"o}rg Werner", title = "{Nutzung von MPI f{\"u}r parallele FEM-Systeme}. ({German}) [{Use} of {MPI} for parallel {FEM} systems]", type = "{Preprint-Reihe des Chemnitzer SFB 393 Sonderforschungsbereich NumerischeSimulation auf Massiv Parallelen Rechnern }", number = "97,08; RA-TR 02-97", institution = "Universit{\"a}t Chemnitz-Zwickau", address = "Chemnitz, Germany", year = "1998", bibdate = "Wed Aug 27 07:11:28 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "German", } @Book{Gropp:1998:MCR, author = "William Gropp and Steven Huss-Lederman and Andrew Lumsdaine and Ewing Lusk and Bill Nitzberg and William Saphir and Marc Snir", title = "{MPI}: The Complete Reference. Volume 2, The {MPI-2} Extensions", publisher = pub-MIT, address = pub-MIT:adr, edition = "Second", pages = "350", year = "1998", ISBN = "0-262-57123-4 (vol. 2), 0-262-69216-3 (set)", ISBN-13 = "978-0-262-57123-4 (vol. 2), 978-0-262-69216-8 (set)", LCCN = "QA76.642 .M65 1998", bibdate = "Thu Oct 29 07:27:43 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See also volume 1 \cite{Snir:1998:MCR}.", price = "US\$30 (paperback)", series = "Scientific and Engineering Computation", URL = "http://mitpress.mit.edu/book-home.tcl?isbn=0262571234", acknowledgement = ack-nhfb, } @Article{Haimes:1998:UPM, author = "R. Haimes and K. E. Jordan", title = "Using {PVM} and {MPI} for Co-processed, Distributed and Parallel Scientific Visualization", journal = j-LECT-NOTES-COMP-SCI, volume = "1388", pages = "1098--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Hansen:1998:EMP, author = "Per Brinch Hansen", title = "An Evaluation of the {Message-Passing Interface}", journal = j-SIGPLAN, volume = "33", number = "3", pages = "65--72", month = mar, year = "1998", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Apr 30 08:30:23 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "The author criticizes MPI, and remarks ``MPI \ldots{} lack[s] the elegance and security that can only by checked by a parallel programming language.''", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Hatazaki:1998:RRS, author = "T. Hatazaki", title = "Rank Reordering Strategy for {MPI} Topology Creation Functions", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "188--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Herland:1998:CML, author = "B. G. Herland and M. Eberl and H. Hellwagner", title = "A Common Messaging Layer for {MPI} and {PVM} over {SCI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1401", pages = "576--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Husbands:1998:MSD, author = "Parry J. Husbands", title = "{MPI-StarT}: Delivering Network Performance to Numerical Applications", crossref = "ACM:1998:SHP", pages = "??--??", year = "1998", bibdate = "Wed Oct 07 08:50:26 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.supercomp.org/sc98/papers/", acknowledgement = ack-nhfb, } @Article{Karlsson:1998:CCC, author = "S. Karlsson and M. Brorsson", title = "A Comparative Characterization of Communication Patterns in Applications Using {MPI} and Shared Memory on an {IBM SP2}", journal = j-LECT-NOTES-COMP-SCI, volume = "1362", pages = "189--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Apr 28 08:51:33 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kemelmakher:1998:SAR, author = "M. Kemelmakher and O. Kremien", title = "Scalable and Adaptive Resource Sharing in {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "196--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kranzlmueller:1998:DPP, author = "D. Kranzlmueller and J. Volkert", title = "Debugging Point-to-Point Communication in {MPI} and {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "265--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kuhn:1998:FFW, author = "Bob Kuhn", title = "{Fortran Futures}: Workshop: {OpenMP} for Parallel {Fortran} Applications", journal = j-FORTRAN-FORUM, volume = "17", number = "3", pages = "22--22", month = dec, year = "1998", CODEN = "????", ISSN = "1061-7264 (print), 1931-1311 (electronic)", ISSN-L = "1061-7264", bibdate = "Thu Feb 07 06:54:12 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran-forum.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM Fortran Forum", issue = "53", } @Article{Lavi:1998:IPD, author = "R. Lavi and A. Barak", title = "Improving the {PVM} Daemon Network Performance by Direct Network Access", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "44--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Leung:1998:PAN, author = "Ka-Cheong Leung and Mounir Hamdi", title = "Performance assessment of network protocols and parallel programming tools for distributed computing systems", journal = j-INT-J-COMPUT-SYST-SCI-ENG, volume = "13", number = "1", pages = "67--80", month = jan, year = "1998", CODEN = "CSSEEI", ISSN = "0267-6192", bibdate = "Thu Feb 4 13:21:32 MST 1999", bibsource = "Compendex database; https://www.math.utah.edu/pub/tex/bib/pvm.bib; OCLC Contents1st database", acknowledgement = ack-nhfb, affiliation = "Hong Kong Univ of Science and Technology", affiliationaddress = "Kowloon, Hong Kong", classification = "722.2; 722.3; 722.4; 723.1; 723.2; 723.5", fjournal = "International Journal of Computer Systems Science and Engineering", journalabr = "Comput Syst Sci Eng", keywords = "Communication overhead; Computer aided software engineering; Computer programming; Computer workstations; Data communication systems; Distributed computer systems; Ethernet; Fiber distributed data interface; Interfaces (computer); Local area networks; Mathematical models; Network protocols; Parallel processing systems; Software Package Express; Software Package PVM", } @Article{Lockey:1998:CRM, author = "P. Lockey and R. Proctor and I. D. James", title = "Characterization of {I/O} Requirements in a Massively Parallel Shelf Sea Model", journal = j-IJHPCA, volume = "12", number = "3", pages = "320--332", month = "Fall", year = "1998", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434209801200302", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 09:20:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200302", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @InProceedings{Lu:1998:ONW, author = "Honghui Lu and Y. Charlie Hu and Willy Zwaenepoel", title = "{OpenMP} on Networks of Workstations", crossref = "ACM:1998:SHP", pages = "??--??", year = "1998", bibdate = "Wed Mar 06 06:32:51 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing98.bib; http://www.supercomp.org/sc98/papers/", URL = "http://www.supercomp.org/sc98/TechPapers/sc98_FullAbstracts/Lu1105/index.htm", acknowledgement = ack-nhfb, } @Article{Mackay:1998:SPF, author = "David Mackay and G. Mahinthakumar and Ed D'Azevedo", title = "A Study of {I/O} in a Parallel Finite Element Groundwater Transport Code", journal = j-IJHPCA, volume = "12", number = "3", pages = "307--319", month = "Fall", year = "1998", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434209801200301", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 09:20:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200301", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @Article{Mamontov:1998:AES, author = "Y. V. Mamontov and M. Willander", title = "An Algorithm to Evaluate Spectral Densities of High-Dimensional Stationary Diffusion Stochastic Processes with Non-linear Coefficients: The General Scheme and Issues on Implementation with {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1541", pages = "315--321", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Sep 15 10:01:31 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1998b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "applied parallel computing; computing science; PARA; parallel computing", } @Article{Mans:1998:PDP, author = "Bernard Mans", title = "Portable distributed priority queues with {MPI}", journal = j-CPE, volume = "10", number = "3", pages = "175--198", month = mar, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:39 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5373; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5373&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Marinho:1998:WMP, author = "J. Marinho and J. G. Silva", title = "{WMPI} --- Message Passing Interface for {Win32} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "113--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Martins:1998:JIW, author = "P. Martins and L. M. Silva and J. Silva", title = "A {Java} Interface for {WMPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "121--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Medeiros:1998:IPM, author = "P. D. Medeiros and J. C. Cunha", title = "Interconnecting {PVM} and {MPI} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "105--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Megson:1998:CRH, author = "G. M. Megson and R. S. Fish and D. N. J. Clarke", title = "Creation of Reconfigurable Hardware Objects in {PVM} Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "215--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{MF:1998:SIM, author = "{MPI Forum}", title = "Special Issue: {MPI2}: a Message-Passing Interface Standard", journal = j-IJHPCA, volume = "12", number = "1--2", pages = "1--299", month = "Spring--Summer", year = "1998", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Apr 8 15:55:29 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Morimoto:1998:IMM, author = "K. Morimoto and T. Matsumoto and K. Hiraki", title = "Implementing {MPI} with the Memory-Based Communication Facilities on the {SSS-CORE} Operating System", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "223--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{MPIForum:1998:SIM, author = "{MPI Forum}", title = "Special Issue: {MPI2}: a Message-Passing Interface Standard", journal = j-IJSAHPC, volume = "12", number = "1--2", pages = "1--299", month = "Spring--Summer", year = "1998", CODEN = "IJSCFG", ISSN = "1078-3482", bibdate = "Wed Apr 8 15:55:29 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Supercomputer Applications and High Performance Computing", } @Article{Neophytou:1998:NDJ, author = "N. Neophytou and P. Evripidou", title = "{Net-dbx}: a {Java} Powered Tool for Interactive Debugging of {MPI} Programs Across the {Internet}", journal = j-LECT-NOTES-COMP-SCI, volume = "1470", pages = "181--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nieplocha:1998:CHP, author = "Jarek Nieplocha and Ian Foster and Rick A. Kendall", title = "{ChemIO}: High Performance Parallel {I/O} for Computational Chemistry Applications", journal = j-IJHPCA, volume = "12", number = "3", pages = "345--363", month = "Fall", year = "1998", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434209801200304", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 09:20:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200304", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @Article{Nitsche:1998:FMP, author = "T. Nitsche and W. Webers", title = "Functional Message Passing with {OPAL-MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "281--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Oldfield:1998:EPS, author = "Ron A. Oldfield and David E. Womble and Curtis C. Ober", title = "Efficient Parallel {I/O} in Seismic Processing", journal = j-IJHPCA, volume = "12", number = "3", pages = "333--344", month = "Fall", year = "1998", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434209801200303", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 09:20:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200303", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @Article{Orlando:1998:MBR, author = "S. Orlando and R. Perego", title = "An {MPI}-based Run-Time Support to Coordinate {HPF} Tasks", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "289--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Papadopoulos:1998:DVS, author = "P. M. Papadopoulos and J. A. Kohl", title = "Dynamic Visualization and Steering Using {PVM} and {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "297--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Poggi:1998:UPD, author = "Agostino Poggi and Giulio Destri", title = "Using {PVM} to Develop a Distributed Object-Oriented Language for Heterogeneous Processing", journal = j-J-SYST-SOFTW, volume = "40", number = "2", pages = "139--150", month = feb, year = "1998", CODEN = "JSSODM", ISSN = "0164-1212 (print), 1873-1228 (electronic)", ISSN-L = "0164-1212", bibdate = "Thu Sep 9 07:30:16 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/01641212", acknowledgement = ack-nhfb, fjournal = "The Journal of systems and software", journal-URL = "http://www.sciencedirect.com/science/journal/01641212", } @Article{Rabenseifner:1998:MGI, author = "R. Rabenseifner", title = "{MPI-GLUE}: Interoperable High-Performance {MPI} Combining Different Vendor's {MPI} Worlds", journal = j-LECT-NOTES-COMP-SCI, volume = "1470", pages = "563--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Oct 10 14:40:24 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Reussner:1998:SDA, author = "R. Reussner and P. Sanders and L. Prechelt and M. Mueller", title = "{SKaMPI}: a Detailed, Accurate {MPI} Benchmark", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "52--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Scott:1998:PWN, author = "S. L. Scott and M. Fischer and A. Geist", title = "{PVM} on {Windows} and {NT} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "231--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sevenich:1998:PPU, author = "Richard Sevenich", title = "Parallel Processing Using {PVM}", journal = j-LINUX-J, volume = "45", pages = "??--??", month = jan, year = "1998", CODEN = "LIJOFX", ISSN = "1075-3583 (print), 1938-3827 (electronic)", ISSN-L = "1075-3583", bibdate = "Fri Oct 9 08:35:26 MDT 1998", bibsource = "http://noframes.linuxjournal.com/lj-issues/issue45/index.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Turn your networked computers into a virtual machine.", acknowledgement = ack-nhfb, fjournal = "Linux journal", journal-URL = "http://portal.acm.org/citation.cfm?id=J508", } @Article{Simitci:1998:CLP, author = "Huseyin Simitci and Daniel A. Reed", title = "A Comparison of Logical and Physical Parallel {I/O} Patterns", journal = j-IJHPCA, volume = "12", number = "3", pages = "364--380", month = "Fall", year = "1998", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434209801200305", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 09:20:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200305", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @Book{Snir:1998:MCR, author = "Marc Snir and Steve W. Otto and Steven Huss-Lederman and David W. Walker and Jack Dongarra", title = "{MPI}: The Complete Reference. Volume 1, The {MPI-1} Core", publisher = pub-MIT, address = pub-MIT:adr, edition = "Second", pages = "450", month = sep, year = "1998", ISBN = "0-262-69215-5 (vol. 1), 0-262-69216-3 (set)", ISBN-13 = "978-0-262-69215-1 (vol. 1), 978-0-262-69216-8 (set)", LCCN = "QA76.642 .M65 1998", bibdate = "Thu Oct 29 07:27:43 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See also volume 2 \cite{Gropp:1998:MCR}.", price = "US\$35 (paperback)", series = "Scientific and Engineering Computation", URL = "http://mitpress.mit.edu/book-home.tcl?isbn=0262692155", acknowledgement = ack-nhfb, } @MastersThesis{Stockinger:1998:VPC, author = "Kurt Stockinger", title = "{ViMPIOS} --- a portable, client-server based implementation of {MPI-IO} on {ViPIOS}", type = "{Diplom-Arbeit}", school = "Universit{\"a}t Wien", address = "Vienna, Austria", pages = "155", year = "1998", bibdate = "Wed Aug 27 07:21:00 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Thakur:1998:CUM, author = "Rajeev S. Thakur", title = "A Case for Using {MPI}'s Derived Datatypes to Improve {I/O} Performance", crossref = "ACM:1998:SHP", pages = "??--??", year = "1998", bibdate = "Wed Oct 07 08:50:26 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.supercomp.org/sc98/papers/", acknowledgement = ack-nhfb, } @Article{Topol:1998:PTV, author = "Brad Topol and John T. Stasko and Vaidy Sunderam", title = "{PVaniM}: a tool for visualization in network computing environments", journal = j-CPE, volume = "10", number = "14", pages = "1197--1222", day = "10", month = dec, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:45 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=40005932; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=40005932&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Tourino:1998:PBL, author = "J. Touri{\~n}o and R. Doallo", title = "A {PVM}-Based Library for Sparse Matrix Factorizations", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "304--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Traeff:1998:PRL, author = "J. L. Traeff", title = "Portable Randomized List Ranking on Multiprocessors Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "395--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wismueller:1998:LMS, author = "R. Wismueller", title = "On-Line Monitoring Support in {PVM} and {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1497", pages = "312--??", year = "1998", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Jan 5 08:21:58 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Yalamanchilli:1998:CPJ, author = "Narendar Yalamanchilli and William Cohen", title = "Communication Performance of {Java} based {Parallel Virtual Machines}", crossref = "ACM:1998:AWJ", pages = "??--??", year = "1998", bibdate = "Thu Apr 27 10:43:08 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.ucsb.edu/conferences/java98/papers/passing.pdf; http://www.cs.ucsb.edu/conferences/java98/papers/passing.ps", acknowledgement = ack-nhfb, } @Article{Zhou:1998:LST, author = "Honbo Zhou and Al Geist", title = "{LPVM}: a step towards multithread {PVM}", journal = j-CPE, volume = "10", number = "5", pages = "407--416", day = "25", month = apr, year = "1998", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:40 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5385; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5385&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @InProceedings{Alexandrov:1999:PMC, author = "V. Alexandrov and A. Karaivanova", title = "Parallel {Monte Carlo} algorithms for sparse {SLAE} using {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "283--290", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Anonymous:1999:BRMa, author = "Anonymous", title = "Book Review: {{\booktitle{MPI --- The complete reference: Volume 1, the MPI core}}, second edition: By Marc Snir, Steve Otto, Steven Huss-Lederman, David Walker and Jack Dongarra. MIT Press, Cambridge, MA. (1998). 426 pages. \$35.00}", journal = j-COMPUT-MATH-APPL, volume = "37", number = "3", pages = "130--130", month = feb, year = "1999", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:57 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122199903590", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Anonymous:1999:BRMb, author = "Anonymous", title = "Book Review: {{\booktitle{MPI-The complete reference: Volume 2, the MPI-2 extensions}}: By William Gropp, Steven Huss-Lederman, Andrew Lumsdaine, Ewing Lusk, Bill Nitzberg, William Saphir and Marc Snir. MIT Press, Cambridge, MA. (1998). 344 pages. \$35.00}", journal = j-COMPUT-MATH-APPL, volume = "37", number = "3", pages = "130--130", month = feb, year = "1999", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:57 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122199903619", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Anonymous:1999:BRMf, author = "Anonymous", title = "Book Review: {{\booktitle{MPI --- The complete reference: Volume 1, the MPI core}}, second edition: By Marc Snir, Steve Otto, Steven Huss-Lederman, David Walker and Jack Dongarra. MIT Press, Cambridge, MA (1998). 426 pages. \$35.00}", journal = j-COMPUT-MATH-APPL, volume = "37", number = "6", pages = "130--130", month = mar, year = "1999", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122199902237", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Anonymous:1999:BRMg, author = "Anonymous", title = "Book Review: {{\booktitle{MPI-The complete reference: Volume 2, the MPI-2 extensions}}: By William Gropp, Steven Huss-Lederman, Andrew Lumsdaine, Ewing Lusk, Bill Nitzberg, William Saphir and Marc Snir. MIT Press, Cambridge, MA. (1998). 344 pages. \$35.00}", journal = j-COMPUT-MATH-APPL, volume = "37", number = "6", pages = "130--130", month = mar, year = "1999", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:48:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122199902250", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @InProceedings{Asai:1999:MIF, author = "Noboru Asai and Thomas Kentemich and Pierre Lagier", title = "{MPI-2} Implementation on a {Fujitsu Generic Message Passing Kernel}", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @InProceedings{Ayguade:1999:EML, author = "E. Ayguade and X. Martorell and J. Labarta and M. Gonzalez and N. Navarro", editor = "????", booktitle = "{Proceedings of the 1999 International Conference on Parallel Processing}", title = "Exploiting multiple levels of parallelism in {OpenMP}: a case study", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "172--180", year = "1999", bibdate = "Mon Oct 07 08:57:41 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Badia:1999:SIT, author = "J. M. Badia and A. M. Vidal", title = "Solving the inverse {Toeplitz} eigenproblem using {ScaLAPACK} and {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "372--379", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Baker:1999:MOO, author = "M. Baker and B. Carpenter and G. Fox and Sung Hoon Koo", title = "{mpiJava}: An Object-Oriented {Java} Interface to {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1586", pages = "748--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Baraglia:1999:PAN, author = "R. Baraglia and R. Ferrini and D. Laforenza and A. Lagana", title = "Parallel approaches to a numerically intensive application using {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "364--371", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Barbosa:1999:ADM, author = "J. Barbosa and A. Padilha", title = "Algorithm-Dependant Method to Determine the Optimal Number of Computers in Parallel Virtual Machines", journal = j-LECT-NOTES-COMP-SCI, volume = "1573", pages = "508--521", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 14 06:09:05 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "parallel processing; VECPAR; vector processing", } @Article{Barnard:1999:MIS, author = "Stephen T. Barnard and Luis M. Bernardo and Horst D. Simon", title = "An {MPI} Implementation of the {SPAI} Preconditioner on the {T3E}", journal = j-IJHPCA, volume = "13", number = "2", pages = "107--123", month = "Summer", year = "1999", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri May 21 13:56:09 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @InProceedings{Bassomo:1999:PGE, author = "P. Bassomo and I. Sakho and A. Corbel", title = "Porting generalized eigenvalue software on distributed memory machines using systolic model principles", crossref = "Dongarra:1999:RAP", number = "1697", pages = "396--403", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Bernaschi:1999:ERA, author = "M. Bernaschi and G. Iannello and M. Lauria", title = "Experimental Results about {MPI} Collective Communication Operations", journal = j-LECT-NOTES-COMP-SCI, volume = "1593", pages = "774--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Bertozzi:1999:MIT, author = "M. Bertozzi and F. Boselli and G. Conte and M. Reggiani", title = "An {MPI} implementation on the top of the virtual interface architecture", crossref = "Dongarra:1999:RAP", number = "1697", pages = "199--206", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Beyls:1999:JJP, author = "K. Beyls and E. D'Hollander and Y. Yu", title = "{JPT}: a {Java} parallelization tool", crossref = "Dongarra:1999:RAP", number = "1697", pages = "173--180", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Blaheta:1999:LFM, author = "R. Blaheta and O. Jakl and J. Stary", title = "Large-scale {FE} modelling in geomechanics: a case study in parallelization", crossref = "Dongarra:1999:RAP", number = "1697", pages = "299--306", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Borkowski:1999:LVC, author = "J. Borkowski", title = "On line visualization or combining the standard {ORNL PVM} with a vendor {PVM} implementation", crossref = "Dongarra:1999:RAP", number = "1697", pages = "157--164", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Boudet:1999:PIH, author = "V. Boudet and F. Rastello and Y. Robert", title = "{PVM} implementation of heterogeneous {ScaLAPACK} dense linear solvers", crossref = "Dongarra:1999:RAP", number = "1697", pages = "333--340", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Bova:1999:NOM, author = "S. W. Bova and C. P. Breshears and C. Cuicchi and Z. Demirbilek and H. Gabb", editor = "????", booktitle = "{Proceedings of the ISCA 12th International Conference. Parallel and Distributed Systems}", title = "Nesting {OpenMP} in an {MPI} application.", publisher = "ISCA", address = "Raleigh, NC, USA", pages = "566--571", year = "1999", bibdate = "Mon Oct 07 09:02:21 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Bova:1999:PPM, author = "Steve W. Bova and Clay P. Breshears and Henry Gabb and Rudolf Eigenmann and Greg Gaertner and Bob Kuhn and Bill Magro and Stefano Salvini", title = "Parallel Programming with Message Passing and Directives", journal = j-SIAM-NEWS, volume = "32", number = "9", pages = "??--??", month = nov, year = "1999", ISSN = "0036-1437", bibdate = "Mon Oct 07 09:13:31 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "SIAM News", journal-URL = "http://www.siam.org/news/", } @InProceedings{Bubak:1999:EFP, author = "M. Bubak and W. Funika and K. Iskra and R. Maruszewski", title = "Enhancing the functionality of performance measurement tools for message passing environments", crossref = "Dongarra:1999:RAP", number = "1697", pages = "67--74", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Bubak:1999:TPR, author = "M. Bubak and P. Luszczek", title = "Towards portable runtime support for irregular and out-of-core computations", crossref = "Dongarra:1999:RAP", number = "1697", pages = "59--66", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Cappello:1999:PNB, author = "F. Cappello and O. Richard and D. Etiemble", title = "Performance of the {NAS} Benchmarks on a Cluster of {SMP PCs} Using a Parallelization of the {MPI} Programs with {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "1662", pages = "339--350", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999b.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Cerin:1999:DMP, author = "C. Cerin", title = "Differentiating Message Passing Interface and Bulk Synchronous Parallel Computation Models", journal = j-LECT-NOTES-COMP-SCI, volume = "1662", pages = "477--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Chapman:1999:EOF, author = "B. Chapman and P. Mehrotra and H. Zima", editor = "????", booktitle = "{Proceedings of Eighth ECMWF Workshop on the Use of Parallel Processors in Meteorology. Towards Teracomputing}", title = "Enhancing {OpenMP} with features for locality control", publisher = pub-WORLD-SCI, address = pub-WORLD-SCI:adr, pages = "301--313", year = "1999", bibdate = "Mon Oct 07 09:10:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{ChassindeKergommeaux:1999:MER, author = "J. {Chassin de Kergommeaux} and M. Ronsse and K. {De Bosschere}", title = "{MPL0*}: {Efficient} record\slash replay of nondeterministic features of message passing libraries", crossref = "Dongarra:1999:RAP", number = "1697", pages = "141--148", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Chaussumier:1999:ACM, author = "F. Chaussumier and F. Desprez and L. Prylli", title = "Asynchronous communications in {MPI} --- The {BIP\slash Myrinet} approach", crossref = "Dongarra:1999:RAP", number = "1697", pages = "485--492", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Chergui:1999:UPP, author = "J. Chergui", title = "Using {PMD} to parallel solve large-scale {Navier--Stokes} equations. Performance analysis on {SGI\slash CRAY-T3E} machine", crossref = "Dongarra:1999:RAP", number = "1697", pages = "341--348", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Chien:1999:DEH, author = "A. Chien and M. Lauria and R. Pennington and M. Showerman and G. Iannello and M. Buchanan and K. Connelly and L. Giannini and G. Koenig and S. Krishnamurthy and Q. Liu and S. Pakin and G. Sampemane", title = "Design and Evaluation of an {HPVM}-Based {Windows NT} Supercomputer", journal = j-IJHPCA, volume = "13", number = "3", pages = "201--219", month = "Fall", year = "1999", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Jul 28 14:14:38 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @InProceedings{Ciegis:1999:HDA, author = "R. Ciegis and R. Sablinskas and J. Wasniewski", title = "Hyper-Rectangle distribution algorithm for parallel multidimensional numerical integration", crossref = "Dongarra:1999:RAP", number = "1697", pages = "275--282", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Claver:1999:PCS, author = "J. M. Claver and M. Mollar and V. Hernandez", title = "Parallel computation of the {SVD} of a matrix product", crossref = "Dongarra:1999:RAP", number = "1697", pages = "388--395", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Clematis:1999:EPC, author = "A. Clematis and V. Gianuzzi", title = "Extending {PVM} with consistent cut capabilities: {Application} aspects and implementation strategies", crossref = "Dongarra:1999:RAP", number = "1697", pages = "101--108", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Corbacho-Lozano:1999:EDD, author = "J. Corbacho-Lozano and O.-I. Lepe-Aldama and J. Sole-Pareta and J. Domingo-Pascual", title = "Experiences deploying a distributed parallel processing environment over a broadband multiservice network", crossref = "Dongarra:1999:RAP", number = "1697", pages = "477--484", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Cormen:1999:PBP, author = "Thomas H. Cormen and James C. Clippinger", title = "Performing {BMMC} Permutations Efficiently on Distributed-Memory Multiprocessors with {MPI}", journal = j-ALGORITHMICA, volume = "24", number = "3--4", pages = "349--370", month = aug, year = "1999", CODEN = "ALGOEJ", ISSN = "0178-4617 (print), 1432-0541 (electronic)", ISSN-L = "0178-4617", MRclass = "68Q22", MRnumber = "MR1687275", bibdate = "Fri Jan 6 11:38:11 MST 2006", bibsource = "dblp-journals-algorithmica.bib; http://dblp.uni-trier.de/db/journals/algorithmica/algorithmica24.html#CormenC99; http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0178-4617&volume=24&issue=3; https://www.math.utah.edu/pub/tex/bib/index-table-a.html#algorithmica; https://www.math.utah.edu/pub/tex/bib/pvm.bib; MathSciNet database", URL = "http://link.springer.de/link/service/journals/00453/bibs/24n3p349.html; http://www.springerlink.com/openurl.asp?genre=article&issn=0178-4617&volume=24&issue=3&spage=349", acknowledgement = ack-nhfb, fjournal = "Algorithmica. An International Journal in Computer Science", journal-URL = "http://link.springer.com/journal/453", oldlabel = "CormenC99", XMLdata = "ftp://ftp.informatik.uni-trier.de/pub/users/Ley/bib/records.tar.gz#journals/algorithmica/CormenC99", } @InProceedings{Cownie:1999:SID, author = "J. Cownie and W. Gropp", title = "A standard interface for debugger access to message queue information in {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "51--58", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Czarnul:1999:DAP, author = "P. Czarnul and H. Krawczyk", title = "Dynamic assignment with process migration in distributed environments", crossref = "Dongarra:1999:RAP", number = "1697", pages = "509--516", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Dan:1999:QAM, author = "Pei Dan and Wang Dongsheng and Zhang Youhui and Shen Meiming", title = "Quasi-asynchronous migration: a novel migration protocol for {PVM} tasks", journal = j-OPER-SYS-REV, volume = "33", number = "2", pages = "5--14", month = apr, year = "1999", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Sat Aug 26 08:55:42 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", } @InProceedings{DeSande:1999:NBS, author = "F. {De Sande} and C. Leon and C. Rodriguez and J. Roda", title = "Nested bulk synchronous parallel computing", crossref = "Dongarra:1999:RAP", number = "1697", pages = "189--198", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Eberl:1999:PCP, author = "M. Eberl and W. Karl and C. Trinitis and A. Blaszczyk", title = "Parallel computing on {PC} clusters --- an alternative to supercomputers for industrial applications", crossref = "Dongarra:1999:RAP", number = "1697", pages = "493--498", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Eickermann:1999:PID, author = "T. Eickermann and H. Grund and J. Henrichs", title = "Performance issues of distributed {MPI} applications in a {German} gigabit testbed", crossref = "Dongarra:1999:RAP", number = "1697", pages = "3--10", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Espinosa:1999:REB, author = "A. Espinosa and F. Parcerisa and T. Margalef and E. Luque", title = "Relating the execution behaviour with the structure of the application", crossref = "Dongarra:1999:RAP", number = "1697", pages = "91--100", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Fang:1999:PMD, author = "Zhiwu Fang and A. D. J. Haymet and Wataru Shinoda and Susumu Okazaki", title = "Parallel molecular dynamics simulation: Implementation of {PVM} for a lipid membrane", journal = j-COMP-PHYS-COMM, volume = "116", number = "2--3", pages = "295--310", month = feb, year = "1999", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(98)00089-7", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 21:30:34 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465598000897", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @InProceedings{Fava:1999:MPI, author = "A. Fava and M. Fava and M. Bertozzi", title = "{MPIPOV}: a parallel implementation of {POV-Ray} based on {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "426--433", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Ferenc:1999:VMK, author = "D. Ferenc and J. Nabrzyski and M. Stroinski and P. Wierzejewski", title = "Visual {MPI}, a knowledge-based system for writing efficient {MPI} applications", crossref = "Dongarra:1999:RAP", number = "1697", pages = "257--266", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Fernandez:1999:PGP, author = "F. Fernandez and J. M. Sanchez and M. Tomassini and J. A. Gomez", title = "A parallel genetic programming tool based on {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "241--248", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Frugoli:1999:DCH, author = "G. Frugoli and A. Fava and E. Fava and G. Conte", title = "Distributed collision handling for particle-based simulation", crossref = "Dongarra:1999:RAP", number = "1697", pages = "410--417", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Gallud:1999:CCU, author = "J. A. Gallud and J. M. Garcia and J. Garcia-Consuegra", title = "Cluster computing using {MPI} and {Windows NT} to solve the processing of remotely sensed imagery", crossref = "Dongarra:1999:RAP", number = "1697", pages = "442--449", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Gallud:1999:DPR, author = "J. A. Gallud and J. Garcia-Consuegra and A. Martinez", title = "Distributed Processing of Remotely Sensed {Landsat-TM} Imagery Using {MPI}", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "2", number = "2", pages = "??--??", month = "????", year = "1999", CODEN = "????", ISSN = "1097-2803", bibdate = "Fri Dec 19 08:14:13 MST 2003", bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no2.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no2abs.html#gallud", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @InProceedings{Garcia:1999:MMI, author = "F. Garcia and A. Calderon and J. Carretero", title = "{MiMPI}: a multithread-safe implementation of {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "207--214", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Garzon:1999:PIE, author = "E. M. Garzon and I. Garcia", title = "A parallel implementation of the eigenproblem for large, symmetric and sparse matrices", crossref = "Dongarra:1999:RAP", number = "1697", pages = "380--387", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Getov:1999:MJM, author = "Vladimir Getov and Paul Gray and Vaidy Sunderam", title = "{MPI} and {Java-MPI}: Contrasts and Comparisons of Low-level Communication Performance", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @Article{Giordano:1999:IBP, author = "M. Giordano and M. M. Furnari and F. Vitobello", title = "Interaction between {PVM} Parameters and Communication Performances on {ATM} Networks", journal = j-LECT-NOTES-COMP-SCI, volume = "1557", pages = "586--587", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 14 06:09:05 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "image processing; multimedia; parallel computation; parallel computing; parallel numerics; ParNum", } @InProceedings{Godlevsky:1999:PSA, author = "A. Godlevsky and M. Gazak and L. Hluchy", title = "Parallelizing of sequential annotated programs in {PVM} environment", crossref = "Dongarra:1999:RAP", number = "1697", pages = "517--524", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Golebiewski:1999:HPI, author = "M. Golebiewski and M. Baum and R. Hempel", title = "High Performance Implementation of {MPI} for {Myrinet}", journal = j-LECT-NOTES-COMP-SCI, volume = "1557", pages = "510--521", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 14 06:09:05 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "image processing; multimedia; parallel computation; parallel computing; parallel numerics; ParNum", } @InProceedings{Gonzalez:1999:PPM, author = "J. A. Gonzalez and C. Rodriguez and J. L. Roda and D. G. Morales", title = "Performance and predictability of {MPI} and {BSP} programs on the {CRAY T3E}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "27--34", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Gropp:1999:RMM, author = "W. Gropp and E. Lusk", title = "Reproducible measurements of {MPI} performance characteristics", crossref = "Dongarra:1999:RAP", number = "1697", pages = "11--18", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Book{Gropp:1999:UMA, author = "William Gropp and Ewing Lusk and Rajeev Thakur", title = "Using {MPI-2}: Advanced Features of the {Message Passing Interface}", publisher = pub-MIT, address = pub-MIT:adr, pages = "275", month = nov, year = "1999", ISBN = "0-262-57133-1", ISBN-13 = "978-0-262-57133-3", LCCN = "QA76.642 .G762 1999", bibdate = "Fri Feb 01 06:52:50 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$32.50", series = "Scientific and Engineering Computation", URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571331", acknowledgement = ack-nhfb, } @Book{Gropp:1999:UMP, author = "William Gropp and Ewing Lusk and Anthony Skjellum", title = "Using {MPI}: Portable Parallel Programming with the {Message Passing Interface}", publisher = pub-MIT, address = pub-MIT:adr, edition = "Second", pages = "350", month = nov, year = "1999", ISBN = "0-262-57132-3 (vol. 1), 0-262-57134-X (set)", ISBN-13 = "978-0-262-57132-6 (vol. 1), 978-0-262-57134-0 (set)", LCCN = "QA76.642.G76 1999", bibdate = "Mon Sep 20 05:54:39 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "US\$32.50", series = "Scientific and Engineering Computation", URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571323", acknowledgement = ack-nhfb, } @Article{Hempel:1999:AMP, author = "Rolf Hempel and Falk Zimmermann", title = "Automatic migration from {PARMACS} to {MPI} in parallel {Fortran} applications", journal = j-SCI-PROG, volume = "7", number = "1", pages = "39--46", month = "????", year = "1999", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=64cr5a4mg33tuhcbdr02%26referrer=parent%26backto=issue%2C3%2C7%3Bjournal%2C8%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @InProceedings{Hidalgo:1999:MMP, author = "J. I. Hidalgo and M. Prieto and J. Lanchares and F. Tirado", title = "A method for model parameter identification using parallel genetic algorithms", crossref = "Dongarra:1999:RAP", number = "1697", pages = "291--298", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Hluchy:1999:GWF, author = "L. Hluchy and V. D. Tran and L. Halada and M. Dobrucky", title = "Ground water flow modelling in {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "450--460", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Huse:1999:CCD, author = "L. P. Huse", title = "Collective communication on dedicated clusters of workstations", crossref = "Dongarra:1999:RAP", number = "1697", pages = "469--476", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Ishihara:1999:VBS, author = "S. Ishihara and S. Tani and A. Takahara", title = "Virtual {BUS}: a simple implementation of an effortless networking system based on {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "461--468", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Kielmann:1999:MMC, author = "Thilo Kielmann and Rutger F. H. Hofman and Henri E. Bal and Aske Plaat and Raoul A. F. Bhoedjang", title = "{MagPIe}: {MPI}'s collective communication operations for clustered wide area systems", journal = j-SIGPLAN, volume = "34", number = "8", pages = "131--140", month = aug, year = "1999", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 14 09:18:06 MST 2003", bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p131-kielmann/", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Koholka:1999:MPR, author = "R. Koholka and H. Mayer and A. Goller", title = "{MPI}-parallelized Radiance on {SGI CoW} and {SMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "1557", pages = "549--558", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 14 06:09:05 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "image processing; multimedia; parallel computation; parallel computing; parallel numerics; ParNum", } @InProceedings{Kranzlmueller:1999:MOM, author = "D. Kranzlmueller and R. Reussner and C. Schaubschlaeger", title = "Monitor overhead measurement with {SKaMPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "43--50", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Larsen:1999:SPG, author = "M. Larsen and P. Madsen", title = "A scalable parallel {Gauss--Seidel} and {Jacobi} solver for animal genetics", crossref = "Dongarra:1999:RAP", number = "1697", pages = "356--363", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Lee:1999:PEJ, author = "Bu-Sung Lee and Yan Gu and Wentong Cai and Alfred Heng", title = "Performance Evaluation of {JPVM}", journal = j-PARALLEL-PROCESS-LETT, volume = "9", number = "3", pages = "401--??", month = sep, year = "1999", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Jan 6 12:02:35 MST 2005", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Luo:1999:SMV, author = "Yong Luo", title = "Shared Memory vs. Message Passing: The {COMOPS} Benchmark Experiment", journal = j-J-SUPERCOMPUTING, volume = "13", number = "3", pages = "283--301", month = may, year = "1999", CODEN = "JOSUED", DOI = "https://doi.org/10.1023/A:1008009103962", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 6 12:13:10 MDT 2005", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=13&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/issuetoc.htm/0920-8542+13+3+1999", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=13&issue=3&spage=283; http://www.wkap.nl/oasis.htm/206582", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "distributed computing; message passing; MPI; performance evaluation; shared memory", } @InProceedings{MacFarlane:1999:PPI, author = "A. MacFarlane and J. A. McCann and S. E. Robertson", title = "{PLIERS}: a parallel information retrieval system using {MPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "317--324", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Matuszek:1999:BPG, author = "M. R. Matuszek and A. Mazurkiewicz and P. W. Uminski", title = "Benchmarking the {PVM} group communication efficiency", crossref = "Dongarra:1999:RAP", number = "1697", pages = "499--508", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Mierendorff:1999:PMB, author = "H. Mierendorff and H. Schwamborn", title = "Performance modeling based on {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "75--82", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Migliardi:1999:PEH, author = "M. Migliardi and V. Sunderam", title = "{PVM} emulation in the harness metacomputing system: a plug-in based approach", crossref = "Dongarra:1999:RAP", number = "1697", pages = "117--124", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Morimoto:1999:PEM, author = "K. Morimoto and T. Matsumoto and K. Hiraki", title = "Performance evaluation of the {MPI\slash MBCF} with the {NAS} parallel benchmarks", crossref = "Dongarra:1999:RAP", number = "1697", pages = "19--26", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Morrison:1999:FPP, author = "J. P. Morrison and R. W. Connolly", title = "Facilitating parallel programming in {PVM} using condensed graphs", crossref = "Dongarra:1999:RAP", number = "1697", pages = "181--188", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Mourao:1999:IMO, author = "F. E. Mourao and J. G. Silva", title = "Implementing {MPI}'s one-sided communications for {WMPI}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "231--240", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Neyman:1999:ERP, author = "M. Neyman and M. Bukowski and P. Kuzora", title = "Efficient replay of {PVM} programs", crossref = "Dongarra:1999:RAP", number = "1697", pages = "83--90", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Nicolescu:1999:PWA, author = "C. Nicolescu and B. Albers and P. Jonker", title = "Parallel watershed algorithm on images from cranial {CT-scans} using {PVM} and {MPI} on a distributed memory system", crossref = "Dongarra:1999:RAP", number = "1697", pages = "418--425", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Papagapiou:1999:NWD, author = "A. Papagapiou and P. Evripidou and G. Samaras", title = "{Net-Console}: a {Web}-based development environment for {MPI} programs", crossref = "Dongarra:1999:RAP", number = "1697", pages = "249--256", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Parrilia:1999:UPD, author = "L. Parrilia and J. Ortega and A. Lloris", title = "Using {PVM} for distributed logic minimization in a network of computers", crossref = "Dongarra:1999:RAP", number = "1697", pages = "541--548", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Pereira:1999:PBI, author = "N. S. A. Pereira", title = "A Parallel {$N$}-body Integrator Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1573", pages = "627--639", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 14 06:09:05 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", keywords = "parallel processing; VECPAR; vector processing", } @InProceedings{Plazek:1999:IIC, author = "J. Plazek and K. Banas and J. Kitowski", title = "Implementation issues of computational fluid dynamics algorithms on parallel computers", crossref = "Dongarra:1999:RAP", number = "1697", pages = "349--355", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Prieto:1999:PRM, author = "M. Prieto and R. Santiago and I. M. Llorente and F. Tirado", title = "A parallel robust multigrid algorithm based on semi-coarsening", crossref = "Dongarra:1999:RAP", number = "1697", pages = "307--316", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Prylli:1999:DHP, author = "L. Prylli and B. Tourancheau and R. Westrelin", title = "The design for a high performance {MPI} implementation on the {Myrinet} network", crossref = "Dongarra:1999:RAP", number = "1697", pages = "223--230", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Rabenseifner:1999:APM, author = "R. Rabenseifner", title = "Automatic profiling of {MPI} applications with hardware performance counters", crossref = "Dongarra:1999:RAP", number = "1697", pages = "35--42", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Radhakrishna:1999:MBP, author = "H. Radhakrishna and S. Divakar and N. Magotra and S. R. J. Brueck", title = "{MPI}-Based Parallel Implementation of a Lithography Pattern Simulation Algorithm", journal = j-LECT-NOTES-COMP-SCI, volume = "1593", pages = "109--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Roe:1999:PMI, author = "Kevin Roe and Piyush Mehrotra", title = "Parallelization of a multigrid incompressible viscous cavity flow solver using {openMP}", type = "{NASA} contractor report", number = "NASA\slash CR-1999-209551", institution = inst-NLRC, address = inst-NLRC:adr, pages = "????", year = "1999", bibdate = "Thu Mar 16 07:20:02 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Also ICASE report 99-36.", acknowledgement = ack-nhfb, keywords = "cavity flow; incompressible flow; multigrid methods; two dimensional flow; viscous flow", } @InProceedings{Rungsawang:1999:PDT, author = "A. Rungsawang and A. Tangpong and P. Laohawee", title = "Parallel {DSIR} text retrieval system", crossref = "Dongarra:1999:RAP", number = "1697", pages = "325--332", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Russ:1999:UHR, author = "Samuel H. Russ and Jonathan Robinson and Matt Gleeson and Brad Meyers and Laxman Rajagopalan and Chun-Heong Tan", title = "Using {Hector} to run {MPI} programs over networked workstations", journal = j-CPE, volume = "11", number = "4", pages = "189--204", day = "10", month = apr, year = "1999", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:48 MDT 1999", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", note = "Special Issue: Applications of Distributed Computing Environments.", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=61004080; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=61004080&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Scherer:1999:TAP, author = "Alex Scherer and Honghui Lu and Thomas Gross and Willy Zwaenepoel", title = "Transparent adaptive parallelism on {NOWs} using {OpenMP}", journal = j-SIGPLAN, volume = "34", number = "8", pages = "96--106", month = aug, year = "1999", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 14 09:18:06 MST 2003", bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan1990.bib", URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p96-scherer/", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @InProceedings{Schuele:1999:HAP, author = "J. Schuele", title = "Heading for an asynchronous parallel ocean model", crossref = "Dongarra:1999:RAP", number = "1697", pages = "404--409", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @MastersThesis{Seifert:1999:ESI, author = "Friedrich Seifert", title = "{Entwicklung von Systemsoftware zur Integration der Virtual InterfaceArchitecture (VIA) in den Linux Betriebssystemkern f{\"u}r optimiertes MessagePassing}. ({German}) [{Development} of system software for integration of the {Virtual InterfaceArchitecture (VIA)} in the {Linux} operating system for optimized message passing]", type = "{Diplomarbeit}", school = "Technische Universit{\"a}t Chemnitz-Zwickau", address = "Chemnitz, Germany", pages = "115", year = "1999", bibdate = "Wed Aug 27 06:25:09 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "German", } @Article{Sen:1999:PBD, author = "Vikramaditya Sen and Mrinal K. Sen and Paul L. Stoffa", title = "{PVM} based {$3$-D Kirchhoff} depth migration using dynamically computed travel-times: an application in seismic data processing", journal = j-PARALLEL-COMPUTING, volume = "25", number = "3", pages = "231--248", day = "22", month = mar, year = "1999", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Aug 6 10:16:02 MDT 1999", bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1999&volume=25&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1999/25/3/1389.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @InProceedings{Shen:1999:ATL, author = "Kai Shen and Hong Tang and Tao Yang", title = "Adaptive Two-level Thread Management for Fast {MPI} Execution on Shared Memory Machines", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @Article{Sidonio:1999:PBI, author = "N. Sidonio and A. Pereira", title = "A Parallel {$N$}-body Integrator Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1573", pages = "627--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Silva:1999:DPP, author = "F. Silva and H. Paulino and L. Lopes", title = "{DipSystem}: a parallel programming system for distributed memory architectures", crossref = "Dongarra:1999:RAP", number = "1697", pages = "525--532", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Silva:1999:IME, author = "P. Silva and J. G. Silva", title = "Implementing {MPI-2} extended collective operations", crossref = "Dongarra:1999:RAP", number = "1697", pages = "125--132", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Sistare:1999:MSP, author = "Steve Sistare and Erica Dorenkamp and Nick Nevin", title = "{MPI} Support in the {Prism} Programming Environment", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @InProceedings{Sistare:1999:OMC, author = "Steve Sistare and Rolf vandeVaart and Eugene Loh", title = "Optimization of {MPI} Collectives on Clusters of Large-scale {SMPs}", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @InProceedings{Stankovic:1999:NVJ, author = "N. Stankovic and K. Zhang", title = "Native versus {Java} message passing", crossref = "Dongarra:1999:RAP", number = "1697", pages = "165--172", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Szeberenyi:1999:SGB, author = "I. Szeberenyi and G. Domokos", title = "Solving generalized boundary value problems with distributed computing and recursive programming", crossref = "Dongarra:1999:RAP", number = "1697", pages = "267--274", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Takahashi:1999:IEM, author = "T. Takahashi and F. O'Carroll and H. Tezuka and A. Hori", title = "Implementation and Evaluation of {MPI} on an {SMP} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "1586", pages = "1178--??", year = "1999", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Sep 13 16:57:02 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs1999a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Tang:1999:CRT, author = "Hong Tang and Kai Shen and Tao Yang", title = "Compile\slash run-time support for threaded {MPI} execution on multiprogrammed shared memory machines", journal = j-SIGPLAN, volume = "34", number = "8", pages = "107--118", month = aug, year = "1999", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sun Dec 14 09:18:06 MST 2003", bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p107-tang/", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Throop:1999:SOS, author = "Joe Throop", title = "Standards: {OpenMP}: Shared-Memory Parallelism from the Ashes", journal = j-COMPUTER, volume = "32", number = "5", pages = "108--109", month = may, year = "1999", CODEN = "CPTRB4", ISSN = "0018-9162 (print), 1558-0814 (electronic)", ISSN-L = "0018-9162", bibdate = "Thu May 6 06:17:23 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/computer1990.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/co/books/co1999/pdf/r5108.pdf", acknowledgement = ack-nhfb, fjournal = "Computer", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2", } @InProceedings{Tourino:1999:MMC, author = "J. Touri{\~n}o and R. Doallo", title = "Modeling {MPI} collective communications on the {AP3000 Multicomputer}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "133--140", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Traeff:1999:FFE, author = "J. L. Traeff and R. Hempel and H. Ritzdoff and F. Zimmermann", title = "Flattening on the fly: {Efficient} handling of {MPI} derived datatypes", crossref = "Dongarra:1999:RAP", number = "1697", pages = "109--116", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Vazquez:1999:PNS, author = "G. E. Vazquez and N. B. Brignole", title = "Parallel {NLP} strategies using {PVM} on heterogeneous distributed environments", crossref = "Dongarra:1999:RAP", number = "1697", pages = "533--540", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Wisniewski:1999:SME, author = "Len Wisniewski and Brad Smisloff and Nils Nieuwejaar", title = "{Sun MPI I/O}: Efficient {I/O} for Parallel Applications", crossref = "ACM:1999:SPO", pages = "??--??", year = "1999", bibdate = "Thu Feb 24 09:02:57 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sc99.org/techpapers/", acknowledgement = ack-nhfb, } @InProceedings{Wong:1999:BMM, author = "F. C. Wong and A. C. Arpaci-Dusseau and D. E. Culler", title = "Building {MPI} for multi-programming systems using implicit information", crossref = "Dongarra:1999:RAP", number = "1697", pages = "215--222", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Wu:1999:JBD, author = "X. Wu and Q. Chen and X.-H. Sun", title = "A {Java}-based Distributed Debbuger Supporting {MPI} and {PVM}", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "2", number = "4", pages = "??--??", month = "????", year = "1999", CODEN = "????", ISSN = "1097-2803", bibdate = "Fri Dec 19 08:14:14 MST 2003", bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4abs.html#wu", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @InProceedings{Wu:1999:MCC, author = "P.-Y. Wu", title = "Minimum communication cost fractal image compression on {PVM}", crossref = "Dongarra:1999:RAP", number = "1697", pages = "434--441", year = "1999", bibdate = "Thu Dec 9 06:08:35 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Zaki:1999:TSP, author = "Omer Zaki and Ewing Lusk and William Gropp and Deborah Swider", title = "Toward Scalable Performance Visualization with {Jumpshot}", journal = j-IJHPCA, volume = "13", number = "3", pages = "277--288", month = "Fall", year = "1999", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Jul 28 14:14:38 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", keywords = "Java; MPI (Message Passing Interface) profiling", } @Article{Zoraja:1999:SPD, author = "Ivan Zoraja and Hermann Hellwagner and Vaidy Sunderam", title = "{SCIPVM}: {Parallel} distributed computing on {SCI} workstation clusters", journal = j-CPE, volume = "11", number = "3", pages = "121--138", month = mar, year = "1999", CODEN = "CPEXEI", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Tue Sep 7 06:06:47 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=61003667; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=61003667&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Misc{Beguelin:19xx:PSS, author = "A. Beguelin and J. J. Dongarra and G. A. Geist and R. Manchek and V. S. Sunderam", title = "{PVM} Software System and Documentation", howpublished = "Email to {\tt netlib@ornl.gov}", month = "????", year = "19xx", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Parallel/Par.Arch.Indep.bib", } @TechReport{Geist:19xx:NBC, author = "G. A. Geist and V. S. Sunderam", title = "Network Based Concurrent Computing on the {PVM} System", institution = inst-ORNL # " and " # inst-EMORY, address = inst-ORNL:adr # " and " # inst-EMORY:adr, year = "19xx", bibsource = "Distributed/Dist.Sys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", } @Article{Adhianto:2000:TOA, author = "L. Adhianto and F. Bodin and B. Chapman and L. Hascoet and A. Kneer and D. Lancaster and I. Wolton and M. Wirtz", title = "Tools for {OpenMP} application development: the {POST} project", journal = j-CPE, volume = "12", number = "12", pages = "1177--1191", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1177::AID-CPE533>3.0.CO;2-V", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500357/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500357&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Anonymous:2000:BRUd, author = "Anonymous", title = "Book Review: {{\booktitle{Using MPI-2: Advanced features of the message-passing interface}}: By William Gropp, Ewing Lusk and Rajeev Thakur. The MIT Press, Cambridge, MA. (1999). 382 pages. \$35 (each); \$60 (set)}", journal = j-COMPUT-MATH-APPL, volume = "40", number = "2--3", pages = "419--419", month = jul # "\slash " # aug, year = "2000", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:49:10 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122100902098", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Anonymous:2000:BRUe, author = "Anonymous", title = "Book Review: {{\booktitle{Using MPI: Portable parallel programming with the message-passing interface}}: Second edition. By William Gropp, Ewing Lusk and Anthony Skjellum. The MIT Press, Cambridge, MA. (1999). 371 pages. \$35 (each); \$60 (set)}", journal = j-COMPUT-MATH-APPL, volume = "40", number = "2--3", pages = "419--419", month = jul # "\slash " # aug, year = "2000", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:49:10 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122100902074", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Armstrong:2000:QDB, author = "Brian Armstrong and Seon Wook Kim and Rudolf Eigenmann", title = "Quantifying Differences between {OpenMP} and {MPI} Using a Large-Scale Application Suite", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "482--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2000.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400482.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400482.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Astalos:2000:CMS, author = "J{\'a}n Astalos and Ladislav Hluch{\'y}", title = "{CIS} --- a Monitoring System for {PC} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "225--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080225.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080225.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Baiardi:2000:AMM, author = "Fabrizio Baiardi and Sarah Chiti and Paolo Mori and Laura Ricci", title = "Adaptive Multigrid Methods in {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "80--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080080.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080080.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Berrendorf:2000:PCO, author = "Rudolf Berrendorf and Guido Nieken", title = "Performance characteristics for {OpenMP} constructs on different parallel computer architectures", journal = j-CPE, volume = "12", number = "12", pages = "1261--1273", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1261::AID-CPE525>3.0.CO;2-5", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500355/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500355&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @InProceedings{Bircsak:2000:EONa, author = "John Bircsak and Peter Craig and RaeLyn Crowell and Zarka Cvetanovic and Jonathan Harris and C. Alexander Nelson and Carl D. Offner", title = "Extending {OpenMP} for {NUMA} Machines", crossref = "ACM:2000:SHP", pages = "68--69", year = "2000", bibdate = "Mon Feb 12 12:29:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap226.pdf", acknowledgement = ack-nhfb, } @Article{Bircsak:2000:EONb, author = "John Bircsak and Peter Craig and RaeLyn Crowell and others", title = "Extending {OpenMP} for {NUMA} machines", journal = j-SCI-PROG, volume = "8", number = "3", pages = "163--181", year = "2000", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 08:44:35 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Bolloni:2000:TIQ, author = "Alessandro Bolloni and Stefano Crocchianti and Antonio Lagan{\`a}", title = "Time Independent {$3$D} Quantum Reactive Scattering on {MIMD} Parallel Computers", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "338--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080338.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080338.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bolton:2000:MPL, author = "Hermanus P. J. Bolton and Jaco F. Schutte and Albert A. Groenwold", title = "Multiple Parallel Local Searches in Global Optimization", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "88--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080088.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080088.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Booth:2000:SSM, author = "S. Booth and E. Mourao", title = "Single-sided {MPI} Implementations for {SUN MPI}", crossref = "ACM:2000:SHP", pages = "46--46", year = "2000", bibdate = "Mon Feb 12 11:57:40 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap182.pdf", acknowledgement = ack-nhfb, } @Article{Bova:2000:DLP, author = "Steve W. Bova and Clay P. Breshears and Christine E. Cuicchi and Zeki Demirbilek and Henry A. Gabb", title = "Dual-Level Parallel Analysis of Harbor Wave Response Using {MPI} and {OpenMP}", journal = j-IJHPCA, volume = "14", number = "1", pages = "49--64", month = "Spring", year = "2000", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Sep 12 12:39:11 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Brieger:2000:HOO, author = "Leesa Brieger", title = "{HPF} to {OpenMP} on the {Origin2000}: a case study", journal = j-CPE, volume = "12", number = "12", pages = "1147--1154", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1147::AID-CPE526>3.0.CO;2-Q", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500351/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500351&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Brorsson:2000:SIE, author = "Mats Brorsson and Barbara Chapman", title = "Special Issue: {EWOMP'99 --- First European Workshop on OpenMP}", journal = j-CPE, volume = "12", number = "12", pages = "1117--1119", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1117::AID-CPE543>3.0.CO;2-#", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500352/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500352&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @InProceedings{Bruno:2000:PEH, author = "G. Bruno and A. A. Chien and M. J. Katz and P. M. Papadopoulos", title = "Performance Enhancements for {HPVM} in Multi-Network and Heterogeneous Hardware", crossref = "Engquist:2000:SVG", pages = "17--32", year = "2000", bibdate = "Mon Oct 23 10:53:54 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Brunschen:2000:OCP, author = "Christian Brunschen and Mats Brorsson", title = "{OdinMP\slash CCp} --- a portable implementation of {OpenMP} for {C}", journal = j-CPE, volume = "12", number = "12", pages = "1193--1203", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1193::AID-CPE527>3.0.CO;2-U", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500347/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500347&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Bubak:2000:IOB, author = "Marian Bubak and W. odzimierz Funika and Bartosz Balis and Roland Wism{\"u}ller", title = "Interoperability of {OCM}-Based On-Line Tools", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "242--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080242.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080242.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Bull:2000:JOL, author = "J. M. Bull and M. E. Kambites", editor = "????", booktitle = "{Proceedings of the ACM 2000 conference on Java Grande}", title = "{JOMP}: an {OpenMP}-like interface for {Java}", publisher = pub-ACM, address = pub-ACM:adr, pages = "44--53", year = "2000", bibdate = "Mon Oct 07 09:19:42 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Bull:2000:PPJ, author = "J. Mark Bull and Mark E. Kambites and Jan Obdrzalek", title = "Parallel Programming in {Java} with {OpenMP}-like Directives", crossref = "ACM:2000:SHP", pages = "150--150", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", acknowledgement = ack-nhfb, } @Article{Butler:2000:SPM, author = "Ralph Butler and William Gropp and Ewing Lusk", title = "A Scalable Process-Management Environment for Parallel Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "168--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080168.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080168.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InCollection{Cahir:2000:PMM, author = "Margaret Cahir and Robert Moench and Alice E. Koniges", title = "Programming Models and Methods", crossref = "Koniges:2000:ISP", chapter = "3", pages = "27--54", year = "2000", bibdate = "Fri Feb 04 18:32:51 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Discusses PVM, MPI, SHMEM, High-Performance Fortran, and POSIX threads.", acknowledgement = ack-nhfb, } @InProceedings{Cappello:2000:MVM, author = "Franck Cappello and Daniel Etiemble", title = "{MPI} versus {MPI+OpenMP} on the {IBM SP} for the {NAS Benchmarks}", crossref = "ACM:2000:SHP", pages = "51--51", year = "2000", bibdate = "Mon Feb 12 11:57:42 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap214.pdf", acknowledgement = ack-nhfb, } @Article{Carpenter:2000:MML, author = "Bryan Carpenter and Vladimir Getov and Glenn Judd and Anthony Skjellum and Geoffrey Fox", title = "{MPJ}: {MPI}-like message passing for {Java}", journal = j-CPE, volume = "12", number = "11", pages = "1019--1038", month = sep, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1019::AID-CPE518>3.0.CO;2-G", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000188/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000188&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Carpenter:2000:OSM, author = "Bryan Carpenter and Geoffrey Fox and Sung Hoon Ko and Sang Lim", title = "Object serialization for marshaling data in a {Java} interface to {MPI}", journal = j-CPE, volume = "12", number = "7", pages = "539--553", month = may, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200005)12:7<539::AID-CPE498>3.0.CO;2-H", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sun Oct 29 16:57:07 MST 2000", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/72516217/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=72516217&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @InProceedings{Cartwright:2000:AOE, author = "Keith L. Cartwright and Joseph D. Blahovec", title = "Adding {OpenMP} to an Existing {MPI} Code: Will It be Beneficial?", crossref = "ACM:2000:SHP", pages = "145--145", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Chen:2000:MCO, author = "Hsiang Ann Chen and Yvette O. Carrasco and Amy W. Apon", title = "{MPI} Collective Operations over {IP} Multicast", journal = j-LECT-NOTES-COMP-SCI, volume = "1800", pages = "51--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:16:18 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1800.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1800/18000051.htm; http://link.springer-ny.com/link/service/series/0558/papers/1800/18000051.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ciaccio:2000:GMG, author = "Giuseppe Ciaccio and Giovanni Chiola", title = "{GAMMA} and {MPI\slash GAMMA} on Gigabit {Ethernet}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "129--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080129.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080129.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Cotronis:2000:CMP, author = "J. Y. Cotronis and Z. Tsiatsoulis and C. Kouniakis", title = "Composition of Message Passing Applications On-Demand", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "192--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080192.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080192.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Couturier:2000:PMD, author = "Rapha{\"e}l Couturier and Christophe Chipot", title = "Parallel molecular dynamics using {OpenMP} on a shared memory machine", journal = j-COMP-PHYS-COMM, volume = "124", number = "1", pages = "49--59", day = "15", month = jan, year = "2000", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(99)00432-4", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:40:32 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465599004324", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Domokos:2000:PRC, author = "G{\'a}bor Domokos and Imre Szeber{\'e}nyi and Paul H. Steen", title = "Parallel, Recursive Computation of Global Stability Charts for Liquid Bridges", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "64--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080064.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080064.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dozsa:2000:THL, author = "G{\'a}bor D{\'o}zsa and D{\'a}niel Dr{\'o}tos and R{\'o}bert Lovas", title = "Translation of a High-Level Graphical Code to Message-Passing Primitives in the {GRADE} Programming Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "258--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080258.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080258.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Eigenmann:2000:TMPa, author = "Rudolf Eigenmann and Tim Mattson", title = "Tutorial {M6A}: Parallel Programming with {OpenMP}: {Part I}", crossref = "ACM:2000:SHP", pages = "21--21", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", acknowledgement = ack-nhfb, } @InProceedings{Eigenmann:2000:TMPb, author = "Rudolf Eigenmann and Tim Mattson", title = "Tutorial {M6B}: Parallel Programming with {OpenMP}: {Part II}", crossref = "ACM:2000:SHP", pages = "23--23", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", acknowledgement = ack-nhfb, } @Article{Espinosa:2000:APA, author = "Antonio Espinosa and Tomas Margalef and Emilio Luque", title = "Automatic Performance Analysis of Master\slash Worker {PVM} Applications with {Kpi}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "47--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080047.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080047.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fagg:2000:AAC, author = "Graham E. Fagg and Sathish S. Vadhiyar and Jack J. Dongarra", title = "{ACCT}: {Automatic Collective Communications Tuning}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "354--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080354.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080354.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fagg:2000:FMF, author = "Graham E. Fagg and Jack J. Dongarra", title = "{FT-MPI}: {Fault Tolerant MPI}, Supporting Dynamic Applications in a Dynamic World", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "346--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080346.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080346.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fahringer:2000:FOP, author = "Thomas Fahringer and Michael Gerndt and Graham Riley and Jesper Larsson Tr{\"a}ff", title = "Formalizing {OpenMP} Performance Properties with {ASL}", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "428--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400428.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400428.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fernandez:2000:DCE, author = "Francisco Fern{\'a}ndez and Marco Tomassini and Leonardo Vanneschi and Laurent Bucher", title = "A Distributed Computing Environment for Genetic Programming Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "322--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080322.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080322.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fernandez:2000:UPM, author = "Gustavo J. Fern{\'a}ndez and Julio Jacobo-Berlles and Patricia Borensztejn and Marisa Bauz{\'a} and Marta Mejail", title = "Use of {PVM} for {MAP} Image Restoration: a Parallel Implementation of the {ARTUR} Algorithm", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "113--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080113.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080113.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fink:2000:IMC, author = "Torsten Fink", title = "Integrating {MPI} Components into Metacomputing Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "208--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080208.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080208.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Geist:2000:PMW, author = "Al Geist", title = "{PVM} and {MPI}: What Else Is Needed for Cluster Computing?", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "1--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080001.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080001.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Girona:2000:VDC, author = "Sergi Girona and Jes{\'u}s Labarta and Rosa M. Badia", title = "Validation of Dimemas Communication Model for {MPI} Collective Operations", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "39--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080039.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080039.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Gonzalez:2000:AIT, author = "M. Gonzalez and A. Serra and X. Martorell and J. Oliver and E. Ayguade and J. Labarta and N. Navarro", editor = "????", booktitle = "{Proceedings 14th International Parallel and Distributed Processing Symposium. IPDPS 2000}", title = "Applying interposition techniques for performance analysis of {OpenMP} parallel applications", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "235--240", year = "2000", bibdate = "Mon Oct 07 09:07:07 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Gonzalez:2000:NSF, author = "Marc Gonz{\`a}lez and Eduard Ayguad{\'e} and Xavier Martorell and Jes{\'u}s Labarta and Nacho Navarro and Jos{\'e} Oliver", title = "{NanosCompiler}: supporting flexible multilevel parallelism exploitation in {OpenMP}", journal = j-CPE, volume = "12", number = "12", pages = "1205--1218", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1205::AID-CPE524>3.0.CO;2-2", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500358/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500358&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Gonzalez:2000:PAM, author = "Daniel Gonz{\'a}lez and Francisco Almeida and Luz Marina Moreno and Casiano Rodr{\'\i}guez", title = "Pipeline Algorithms on {MPI}: Optimal Mapping of the Path Planing Problem", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "104--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080104.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080104.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gonzalez:2000:TSN, author = "J. A. Gonz{\'a}lez and C. Le{\'o}n and F. Piccoli and M. Printista and J. L. Roda and C. Rodr{\'\i}guez and F. Sande", title = "Towards Standard Nested Parallelism", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "96--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080096.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080096.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2000:RCD, author = "William D. Gropp", title = "Runtime Checking of Datatype Signatures in {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "160--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080160.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080160.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Gropp:2000:TSU, author = "William Gropp and Ewing (Rusty) Lusk and Rajeev S. Thakur", title = "Tutorial {S1}: Using {MPI-2}: a Tutorial on Advanced Features of the Message-Passing Interface", crossref = "ACM:2000:SHP", pages = "11--11", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Book{Gropp:2000:UMA, author = "William Gropp and Ewing Lusk and Rajeev Thakur", title = "Using {MPI-2}: Advanced Features of the {Message Passing Interface}", publisher = pub-MIT, address = pub-MIT:adr, pages = "xxi + 382", year = "2000", ISBN = "0-262-57133-1", ISBN-13 = "978-0-262-57133-3", LCCN = "QA76.642 .G762 1999", bibdate = "Wed Aug 27 06:19:05 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Scientific and engineering computation", acknowledgement = ack-nhfb, } @InProceedings{He:2000:PAA, author = "Yun (Helen) He and Chris H. Q. Ding", title = "Platforms: An Accurate Arithmetics Approach", crossref = "ACM:2000:SHP", pages = "150--150", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", abstract = "Numerical reproducibility of large-scale scientific simulations, especially climate modeling, on distributed memory parallel computers are becoming critical issues. In particular, global summation and dot products of distributed arrays are very susceptible to rounding errors. We analyzed several accurate summation methods and found that two methods are particularly effective to improve (ensure) reproducibility: Kahan's self-compensated summation and Bailey's double-double precision summation. We provide an MPI operator MPI\_SUMDD to work with MPI collective operations to ensure a scalable implementation on large number of processors. The final methods are particularly simple to adopt in practical codes.", acknowledgement = ack-nhfb, keywords = "floating-point arithmetic; rounding errors", } @InProceedings{He:2000:UAA, author = "Yun He and Chris H. Q. Ding", title = "Using accurate arithmetics to improve numerical reproducibility and stability in parallel applications", crossref = "Reynders:2000:IPI", pages = "225--234", year = "2000", DOI = "https://doi.org/10.1145/335231.335253", bibdate = "Sat Feb 8 18:35:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/abs/10.1145/335231.335253", abstract = "Numerical reproducibility and stability of large scale scientific simulations, especially climate modeling, on distributed memory parallel computers are becoming critical issues. In particular, global summation of distributed arrays is most susceptible to rounding errors, and their propagation and accumulation cause uncertainty in final simulation results. We analyzed several accurate summation methods and found that two methods are particularly effective to improve (ensure) reproducibility and stability: Kahan's self-compensated summation and Bailey's double-double precision summation. We provide an MPI operator MPLSUMDD to work with MPI collective operations to ensure a scalable implementation on large number of processors. The final methods are particularly simple to adopt in practical codes.", acknowledgement = ack-nhfb, } @Article{Hisley:2000:PPE, author = "Dixie Hisley and Gagan Agrawal and Punyam Satya-narayana and Lori Pollock", title = "Porting and performance evaluation of irregular codes using {OpenMP}", journal = j-CPE, volume = "12", number = "12", pages = "1241--1259", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1241::AID-CPE523>3.0.CO;2-D", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500349/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500349&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Hu:2000:ONS, author = "Y. Charlie Hu and Honghui Lu and Alan L. Cox and Willy Zwaenepoel", title = "{OpenMP} for Networks of {SMPs}", journal = j-J-PAR-DIST-COMP, volume = "60", number = "12", pages = "1512--1530", day = "1", month = dec, year = "2000", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.2000.1658", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Tue Jul 17 08:06:43 MDT 2001", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658/ref", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Huse:2000:MOS, author = "Lars Paul Huse", title = "{MPI} Optimization for {SMP} Based Clusters Interconnected with {SCI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "56--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080056.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080056.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Imamura:2000:ASM, author = "Toshiyuki Imamura and Yuichi Tsujita and Hiroshi Koide and Hiroshi Takemiya", title = "An Architecture of {Stampi}: {MPI} Library on a Cluster of Parallel Computers", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "200--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080200.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080200.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ishizaka:2000:CGT, author = "Kazuhisa Ishizaka and Motoki Obata and Hironori Kasahara", title = "Coarse-Grain Task Parallel Processing Using the {OpenMP} Backend of the {OSCAR} Multigrain Parallelizing Compiler", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "457--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400457.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400457.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Iskra:2000:IDE, author = "K. A. Iskra and F. van der Linden and Z. W. Hendrikse and B. J. Overeinder and G. D. van Albada and P. M. A. Sloot", title = "The implementation of dynamite: an environment for migrating {PVM} tasks", journal = j-OPER-SYS-REV, volume = "34", number = "3", pages = "40--55", month = jul, year = "2000", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Sat Aug 26 08:55:47 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", } @Article{Iskra:2000:PMD, author = "K. A. Iskra and Z. W. Hendrikse and G. D. van Albada and B. J. Overeinder and P. M. A. Sloot", title = "Performance Measurements on {Dynamite\slash DPVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "27--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080027.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080027.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Jin:2000:AGO, author = "Haoqiang Jin and Michael Frumkin and Jerry Yan", title = "Automatic Generation of {OpenMP} Directives and Its Application to Computational Fluid Dynamics Codes", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "440--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400440.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400440.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kuhn:2000:OVT, author = "Bob Kuhn and Paul Petersen and Eamonn O'Toole", title = "{OpenMP} versus threading in {C\slash C++}", journal = j-CPE, volume = "12", number = "12", pages = "1165--1176", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1165::AID-CPE529>3.0.CO;2-L", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500354/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500354&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Kusano:2000:PEO, author = "Kazuhiro Kusano and Shigehisa Satoh and Mitsuhisa Sato", title = "Performance Evaluation of the Omni {OpenMP} Compiler", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "403--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400403.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400403.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Landman:2000:PLR, author = "Joseph Landman and Piotr Piecuch", title = "Parallelization of a legacy research program using {OpenMP}", journal = j-FORTRAN-FORUM, volume = "19", number = "2", pages = "16--23", month = aug, year = "2000", CODEN = "????", ISSN = "1061-7264 (print), 1931-1311 (electronic)", ISSN-L = "1061-7264", bibdate = "Wed Feb 6 18:50:08 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran-forum.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM Fortran Forum", } @Article{Laohawee:2000:PDT, author = "P. Laohawee and A. Tangpong and A. Rungsawang", title = "Parallel {DSIR} Text Indexing System: Using Multiple Master\slash Slave Concept", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "297--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080297.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080297.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lassous:2000:HGA, author = "Isabelle Gu{\'e}rin Lassous and Jens Gustedt and Michel Morvan", title = "Handling Graphs According to a Coarse Grained Approach: Experiments with {PVM} and {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "72--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080072.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080072.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Livny:2000:MYW, author = "Miron Livny", title = "Managing Your Workforce on a Computational Grid", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "3--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080003.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080003.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Louca:2000:MFP, author = "S. Louca and N. Neophytou and A. Lachanas and P. Evripidou", title = "{MPI-FT}: Portable Fault Tolerance Scheme for {MPI}", journal = j-PARALLEL-PROCESS-LETT, volume = "10", number = "4", pages = "371--??", month = dec, year = "2000", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Wed Jul 25 16:34:42 2001", bibsource = "http://ejournals.wspc.com.sg/ppl/ppl.shtml; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ejournals.wspc.com.sg/ppl/10/1004/S0129626400000342.html", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Lusk:2000:IIC, author = "Ewing Lusk", title = "Isolating and Interfacing the Components of a Parallel Computing Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "5--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080005.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080005.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Mattson:2000:BOF, author = "Tim Mattson", title = "{BOF}: {OpenMP} and its Future Developments", crossref = "ACM:2000:SHP", pages = "106--106", year = "2000", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib", acknowledgement = ack-nhfb, } @Article{Mattson:2000:IO, author = "Timothy G. Mattson", title = "An Introduction to {OpenMP 2.0}", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "384--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400384.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400384.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mazzocca:2000:TPP, author = "N. Mazzocca and M. Rak and U. Villano", title = "The Transition from a {PVM} Program Simulator to a Heterogeneous System Simulator: The {HeSSE} Project", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "266--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080266.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080266.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{McDonald:2000:TPA, author = "Chris McDonald and Kamran Kazemi", title = "Teaching parallel algorithm with process topologies", journal = j-SIGCSE, volume = "32", number = "1", pages = "70--74", month = mar, year = "2000", CODEN = "SIGSD3", DOI = "https://doi.org/10.1145/331795.331816", ISSN = "0097-8418 (print), 2331-3927 (electronic)", ISSN-L = "0097-8418", bibdate = "Mon Nov 19 10:05:03 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigcse2000.bib", abstract = "Parallel algorithms are often introduced to students by describing the geometric topologies formed by communicating processes and often the geographic relationships between them. However, the two most common message passing environments used in teaching, PVM and MPI, each provide only rudimentary support for the specification and execution of process topologies. There is a strong need for better syntactic and semantic support for process topologies in these environments, so that students may concentrate on the algorithms being studied, and not have to wrestle with the environments' infrastructure. This paper first motivates, and then describes the use of additional support within PVM and MPI which addresses this need.", acknowledgement = ack-nhfb, fjournal = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688", } @Article{Mierendorff:2000:WMB, author = "Hermann Mierendorff and Kl{\"a}re Cassirer and Helmut Schwamborn", title = "Working with {MPI} Benchmarking Suites on {ccNUMA} Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "18--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080018.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080018.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Migliardi:2000:SFT, author = "Mauro Migliardi and Vaidy Sunderam and Arrigo Frisiani", title = "A Simple, Fault Tolerant Naming Space for the {HARNESS} Metacomputing System", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "152--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080152.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080152.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mourao:2000:SSC, author = "Elson Mour{\~a}o and Stephen Booth", title = "Single Sided Communications in Multi-protocol {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "176--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080176.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080176.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Neyman:2000:CDA, author = "Marcin Neyman", title = "Comparison of Different Approaches to Trace {PVM} Program Execution", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "274--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080274.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080274.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Nikolopoulos:2000:DDN, author = "Dimitrios S. Nikolopoulos and Theodore S. Papatheodorou and Constantine D. Polychronopoulos and Jesus Labarta and Eduard Ayguade", title = "Is Data Distribution Necessary in {OpenMP}?", crossref = "ACM:2000:SHP", pages = "68--68", year = "2000", bibdate = "Mon Feb 12 11:57:45 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap192.pdf", acknowledgement = ack-nhfb, } @Article{Nikolopoulos:2000:LTD, author = "Dimitrios S. Nikolopoulos and Theodore S. Papatheodorou and Constantine D. Polychronopoulos and Jes{\'u}s Labarta and Eduard Ayguad{\'e}", title = "Leveraging Transparent Data Distribution in {OpenMP} via User-Level Dynamic Page Migration", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "415--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400415.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400415.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nikolopoulos:2000:TRD, author = "Dimitrios S. Nikolopoulos and Theodore S. Papatheodorou and Constantine D. Polychronopoulos and others", title = "A transparent runtime data distribution engine for {OpenMP}", journal = j-SCI-PROG, volume = "8", number = "3", pages = "143--162", year = "2000", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 08:44:35 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Nikolopoulos:2000:ULR, author = "Dimitrios S. Nikolopoulos and Theodore S. Papatheodorou and Constantine D. Polychronopoulos and Jes{\'u}s Labarta and Eduard Ayguad{\'e}", title = "{UPM LIB}: a Runtime System for Tuning the Memory Performance of {OpenMP} Programs on Scalable Shared-Memory Multiprocessors", journal = j-LECT-NOTES-COMP-SCI, volume = "1915", pages = "85--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:08:51 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150085.htm; http://link.springer-ny.com/link/service/series/0558/papers/1915/19150085.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nishitani:2000:IEO, author = "Yasunori Nishitani and Kiyoshi Negishi and Hiroshi Ohta and Eiji Nunohiro", title = "Implementation and Evaluation of {OpenMP} for {Hitachi SR8000}", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "391--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400391.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400391.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nitsche:2000:TCM, author = "Thomas Nitsche", title = "Thread Communication over {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "145--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080145.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080145.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Ong:2000:PCL, author = "Hong Ong and Paul A. Farrell", title = "Performance Comparison of {LAM\slash MPI}, {MPICH}, and {MVICH} on a {Linux} Cluster Connected by a {Gigabit Ethernet} Network", crossref = "USENIX:2000:PAL", pages = "??--??", year = "2000", bibdate = "Wed Oct 16 05:17:16 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.usenix.org/publications/library/proceedings/als2000/ong.html", acknowledgement = ack-nhfb, } @Article{Orlando:2000:MDT, author = "S. Orlando and P. Palmerini and R. Perego", title = "Mixed data and task parallelism with {HPF} and {PVM}", journal = "Cluster Computing", volume = "3", number = "3", publisher = "Kluwer Academic Publishers, Boston, U.S.A", pages = "201--213", year = "2000", CODEN = "????", ISSN = "1386-7857", bibdate = "Sat Dec 7 09:42:43 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta database", acknowledgement = ack-nhfb, pagecount = "13", } @Article{Payrits:2000:UPC, author = "Szabolcs Payrits and Zolt{\'a}n Szatm{\'a}ry and L{\'a}szl{\'o} Zal{\'a}nyi and P{\'e}ter {\'E}rdi", title = "Use of Parallel Computers in Neurocomputing", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "313--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080313.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080313.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pedroso:2000:MPC, author = "Hern{\^a}ni Pedroso and Jo{\~a}o Gabriel Silva", title = "{MPI-2} Process Creation \& Management Implementation for {NT} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "184--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080184.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080184.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Petcu:2000:PDAa, author = "Dana Petcu", title = "{PVMaple}: a Distributed Approach to Cooperative Work of {Maple} Processes", type = "Technical report", institution = "Westers University of Timisoara", address = "Timisoara, Romania", month = may, year = "2000", bibdate = "Wed Dec 17 18:08:30 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.risc.uni-linz.ac.at/software/distmaple/index_1.html", URL = "http://www.risc.uni-linz.ac.at/software/distmaple/misc/PVMaple.ps.gz", acknowledgement = ack-nhfb, keywords = "Distributed Maple; PVMaple", } @Article{Petcu:2000:PDAb, author = "Dana Petcu", title = "{PVMaple}: a Distributed Approach to Cooperative Work of {Maple} Processes", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "216--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080216.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080216.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Plazek:2000:SCC, author = "Joanna P{\l}azek and Krzysztof Banas and Jacek Kitowski", title = "Scalable {CFD} Computations Using Message-Passing and Distributed Shared Memory Algorithms", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "282--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080282.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080282.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Protopopov:2000:SMC, author = "Boris V. Protopopov and Anthony Skjellum", title = "Shared-memory communication approaches for an {MPI} message-passing library", journal = j-CPE, volume = "12", number = "9", pages = "799--820", day = "10", month = aug, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(20000810)12:9<799::AID-CPE476>3.0.CO;2-1", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sun Oct 29 16:57:07 MST 2000", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/72516482/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=72516482&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Quoy:2000:PNN, author = "Mathias Quoy and Sorin Moga and Philippe Gaussier and Arnaud Revel", title = "Parallelization of Neural Networks Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "289--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080289.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080289.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Rabaea:2000:EPM, author = "Adrian Rabaea and Monica Rabaea", title = "Experiments with Parallel {Monte Carlo} Simulation for Pricing Options Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "330--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080330.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080330.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Reussner:2000:BMD, author = "Ralf Reussner and Jesper Larsson Tr{\"a}ff and Gunnar Hunzelmann", title = "A Benchmark for {MPI} Derived Datatypes", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "10--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080010.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080010.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @MastersThesis{Rohrl:2000:PPS, author = "Armin R{\"o}hrl", title = "Parallel processing in statistical computation: {BSP}, {FPGas} and {MPI} for the {S}-language", type = "Th{\`e}se sciences", school = "EPF Lausanne", address = "Lausanne, Switzerland", pages = "137", year = "2000", bibdate = "Wed Aug 27 07:24:45 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Roy:2000:MGQ, author = "Alain J. Roy and Ian Foster and William Gropp and Nicholas Karonis and Volker Sander and Brian Toonen", title = "{MPICH-GQ}: Quality-of-Service for Message Passing Programs", crossref = "ACM:2000:SHP", pages = "54--54", year = "2000", bibdate = "Mon Feb 12 11:57:43 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap234.pdf", acknowledgement = ack-nhfb, } @Article{Scherer:2000:APO, author = "Alex Scherer and Thomas Gross and Willy Zwaenepoel", title = "Adaptive Parallelism for {OpenMP} Task Parallel Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "1915", pages = "113--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:08:51 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150113.htm; http://link.springer-ny.com/link/service/series/0558/papers/1915/19150113.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Shah:2000:FCS, author = "Sanjiv Shah and Grant Haab and Paul Petersen and Joe Throop", title = "Flexible control structures for parallelism in {OpenMP}", journal = j-CPE, volume = "12", number = "12", pages = "1219--1239", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1219::AID-CPE530>3.0.CO;2-0", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500348/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500348&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Shyu:2000:APV, author = "Shyong-Jian Shyu and B. M. T. Lin", title = "An application of parallel virtual machine framework to film production problem", journal = j-COMPUT-MATH-APPL, volume = "39", number = "12", pages = "53--62", month = jun, year = "2000", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:49:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122100001292", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Silva:2000:HPC, author = "Lu{\'\i}s Moura Silva and Paulo Martins and Jo{\~a}o Gabriel Silva", title = "Heterogeneous parallel computing using {Java} and {WMPI}", journal = j-CPE, volume = "12", number = "11", pages = "1077--1091", month = sep, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1077::AID-CPE521>3.0.CO;2-#", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000189/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000189&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Smith:2000:DPM, author = "Lorna Smith and Paul Kent", title = "Development and performance of a mixed {OpenMP\slash MPI} quantum {Monte Carlo} code", journal = j-CPE, volume = "12", number = "12", pages = "1121--1129", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1121::AID-CPE531>3.0.CO;2-N", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500350/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500350&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Solsona:2000:MCM, author = "Francesc Solsona and Francesc Gin{\'e} and Josep L{\'e}rida and Porfidio Hern{\'a}ndez and Emilio Luque", title = "{Monito}: a Communication Monitoring Tool for a {PVM--Linux} Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "233--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080233.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080233.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sosa:2000:IQC, author = "C. P. Sosa and G. Scalmani and R. Gomperts and M. J. Frisch", title = "Ab initio quantum chemistry on a {ccNUMA} architecture using {openMP}. {III}", journal = j-PARALLEL-COMPUTING, volume = "26", number = "7--8", pages = "843--856", month = jul, year = "2000", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Oct 28 17:44:32 MDT 2000", bibsource = "http://www.elsevier.com/locate/issn/01678191; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.nl/gej-ng/10/35/21/42/29/25/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/42/29/25/article.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Sterling:2000:SCB, author = "Thomas Sterling", title = "Symbolic Computing with {Beowulf}-Class {PC} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "7--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080007.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080007.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Suppi:2000:IOP, author = "Remo Suppi and Fernando Cores and Emilio Luque", title = "Improving Optimistic {PDES} in {PVM} Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "304--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080304.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080304.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Tanaka:2000:PEO, author = "Yoshizumi Tanaka and Kenjiro Taura and Mitsuhisa Sato and Akinori Yonezawa", title = "Performance Evaluation of {OpenMP} Applications with Nested Parallelism", journal = j-LECT-NOTES-COMP-SCI, volume = "1915", pages = "100--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:08:51 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150100.htm; http://link.springer-ny.com/link/service/series/0558/papers/1915/19150100.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Tang:2000:PTR, author = "Hong Tang and Kai Shen and Tao Yang", title = "Program transformation and runtime support for threaded {MPI} execution on shared-memory machines", journal = j-TOPLAS, volume = "22", number = "4", pages = "673--700", year = "2000", CODEN = "ATPSDT", ISSN = "0164-0925 (print), 1558-4593 (electronic)", ISSN-L = "0164-0925", bibdate = "Tue Apr 17 10:05:24 MDT 2001", bibsource = "http://www.acm.org/pubs/toc/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/citations/journals/toplas/2000-22-4/p673-tang/", abstract = "Parallel programs written in MPI have been widely used for developing high-performance applications on various platforms. Because of a restriction of the MPI computation model, conventional MPI implementations on shared-memory machines map each MPI node to an OS process, which can suffer serious performance degradation in the presence of multiprogramming. This paper studies compile-time and runtime techniques for enhancing performance portability of MPI code running on multiprogrammed shared-memory machines. The proposed techniques allow MPI nodes to be executed safety and efficiently as threads. Compile-time transformation eliminates global and static variables in C code using node-specific data. The runtime support includes an efficient and provably correct communication protocol that uses lock-free data structure and takes advantage of address space sharing among threads. The experiments on SGI Origin 2000 show that our MPI prototype called TMPI using the proposed techniques is competitive with SGI's native MPI implementation in a dedicated environment, and that it has significant performance advantages in a multiprogrammed environment.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Programming Languages and Systems", generalterms = "Algorithms; Design; Experimentation; Languages; Performance", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783", keywords = "lock-free synchronization; MPI; multiprogrammed environments; program transformation; shared-memory machines; threaded execution", subject = "Hardware --- Memory Structures --- Design Styles (B.3.2): {\bf Shared memory}; Software --- Programming Techniques --- Concurrent Programming (D.1.3): {\bf Parallel programming}; Software --- Programming Languages --- Language Classifications (D.3.2): {\bf Concurrent, distributed, and parallel languages}; Software --- Programming Languages --- Processors (D.3.4): {\bf Preprocessors}; Software --- Programming Languages --- Processors (D.3.4): {\bf Run-time environments}; Software --- Operating Systems --- Process Management (D.4.1): {\bf Multiprocessing/multiprogramming/multitasking}; Data --- Data Structures (E.1): {\bf Lists, stacks, and queues}", } @Article{Tatebe:2000:IOO, author = "Osamu Tatebe and Mitsuhisa Sato and Satoshi Sekiguchi", title = "Impact of {OpenMP} Optimizations for the {MGCG} Method", journal = j-LECT-NOTES-COMP-SCI, volume = "1940", pages = "471--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:17:15 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm; https://www.math.utah.edu/pub/tex/bib/lncs2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400471.htm; http://link.springer-ny.com/link/service/series/0558/papers/1940/19400471.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Tavora:2000:DCM, author = "V{\'\i}tor N. T{\'a}vora and Lu{\'\i}s M. Silva and Jo{\~a}o Gabriel Silva", title = "Distributed Checkpointing Mechanism for a Parallel File System", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "137--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080137.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080137.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Thiruvathukal:2000:JNW, author = "George K. Thiruvathukal and Phillip M. Dickens and Shahzad Bhatti", title = "{Java} on networks of workstations {(JavaNOW)}: a parallel computing framework inspired by {Linda} and the {Message Passing Interface (MPI)}", journal = j-CPE, volume = "12", number = "11", pages = "1093--1116", month = sep, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1093::AID-CPE522>3.0.CO;2-6", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000187/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000187&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Tourancheau:2000:HSN, author = "Bernard Tourancheau", title = "High Speed Networks for Clusters, the {BIP-Myrinet} Experience", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "9--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080009.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080009.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Traff:2000:IMO, author = "Jesper Larsson Traff and Hubert Ritzdorf and Rolf Hempel", title = "The Implementation of {MPI-2} One-Sided Communication for the {NEC SX-5}", crossref = "ACM:2000:SHP", pages = "45--46", year = "2000", bibdate = "Mon Feb 12 11:57:32 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap181.pdf", acknowledgement = ack-nhfb, } @Article{Tran:2000:PPM, author = "Viet D. Tran and Ladislav Hluchy and Giang T. Nguyen", title = "Parallel Program Model for Distributed Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "250--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080250.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080250.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{VanVoorst:2000:CMI, author = "Brian {Van Voorst} and Steven Seidel", title = "Comparison of {MPI} Implementations on a Shared Memory Machine", journal = j-LECT-NOTES-COMP-SCI, volume = "1800", pages = "847--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 09:16:18 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1800.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1800/18000847.htm; http://link.springer-ny.com/link/service/series/0558/papers/1800/18000847.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Varin:2000:PAL, author = "E. Varin and R. Roy and G. Samba", title = "Parallel Algorithms for the Least-Squares Finite Element Solution of the Neutron Transport Equation", journal = j-LECT-NOTES-COMP-SCI, volume = "1908", pages = "121--??", year = "2000", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:30:27 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080121.htm; http://link.springer-ny.com/link/service/series/0558/papers/1908/19080121.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Vetter:2000:DST, author = "Jeffrey S. Vetter and Bronis R. de Supinski", title = "Dynamic Software Testing of {MPI} Applications with {Umpire}", crossref = "ACM:2000:SHP", pages = "70--70", year = "2000", bibdate = "Mon Feb 12 11:57:45 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap208.pdf", acknowledgement = ack-nhfb, } @TechReport{VidalMacia:2000:IPM, author = "Antonio {Vidal Maci{\'a}} and Jos{\'e} Luis {P{\'e}rez G{\'o}mez}", title = "Introducci{\'o}n a la programaci{\'o}n en {MPI}. ({Spanish}) [{Introduction} to programming in {MPI}]", type = "Technical report", number = "{SPUPV-2000.209}", institution = "Departamento de Sistemas Inform{\'a}ticos y Computaci{\'o}n, Facultad de Inform{\'a}tica, Universidad Polit{\'e}cnica de Valencia, Servicio de Publicaciones", address = "Valencia, Spain", pages = "78", year = "2000", bibdate = "Wed Aug 27 06:35:39 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, language = "Spanish", } @Article{Wallcraft:2000:SOV, author = "Alan J. Wallcraft", title = "{SPMD} {OpenMP} versus {MPI} for ocean models", journal = j-CPE, volume = "12", number = "12", pages = "1155--1164", month = oct, year = "2000", CODEN = "CPEXEI", DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1155::AID-CPE532>3.0.CO;2-5", ISSN = "1040-3108", ISSN-L = "1040-3108", bibdate = "Sat Apr 7 06:56:10 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1040-3108; https://www.math.utah.edu/pub/tex/bib/cpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500353/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500353&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency, practice and experience", } @Article{Addison:2001:EOP, author = "Cliff Addison", title = "Exploiting {OpenMP} to Provide Scalable {SMP BLAS} and {LAPACK} Routines", journal = j-LECT-NOTES-COMP-SCI, volume = "2073", pages = "3--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:28 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730003.htm; http://link.springer-ny.com/link/service/series/0558/papers/2073/20730003.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Al-Tawil:2001:PME, author = "Khalid Al-Tawil and Csaba Andras Moritz", title = "Performance Modeling and Evaluation of {MPI}", journal = j-J-PAR-DIST-COMP, volume = "61", number = "2", pages = "202--223", day = "1", month = feb, year = "2001", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.2000.1677", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Feb 22 15:30:36 MST 2002", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677/ref", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{AlHaddad:2001:UNW, author = "Mohammed {Al Haddad} and Jerome Robinson", title = "Using a Network of Workstations to Enhance Database Query Processing Performance", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "352--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310352.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310352.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Allsopp:2001:EUM, author = "Nicholas K. Allsopp and John F. Hague and Jean-Pierre Prost", title = "Experiences in Using {MPI--IO} on Top of {GPFS} for the {IFS} Weather Forecast Code", journal = j-LECT-NOTES-COMP-SCI, volume = "2150", pages = "380--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:53 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500380.htm; http://link.springer-ny.com/link/service/series/0558/papers/2150/21500380.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Angskun:2001:DPM, author = "Thara Angskun and Putchong Uthayopas and Arnon Rungsawang", title = "Dynamic Process Management in {KSIX} Cluster Middleware", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "209--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310209.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310209.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Anonymous:2001:AAL, author = "Anonymous", title = "Appendixes: Appendix {A}: {Linux}, {Windows NT}, {AIX}, {Solaris}; Appendix {B}: Compilers and Preprocessors, {MPI} Implementations, Development Environments, Debuggers, Performance Analyzers", journal = j-IJHPCA, volume = "15", number = "2", pages = "191--194", month = "Summer", year = "2001", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/109434200101500213", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue May 01 05:27:17 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434200101500213", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", xxmonth = may, } @Article{Anonymous:2001:EDP, author = "Anonymous", title = "Erratum: Design and Prototype of a Performance Tool Interface for {OpenMP}", journal = j-J-SUPERCOMPUTING, volume = "23", number = "1", pages = "105--128", month = may, year = "2001", CODEN = "JOSUED", DOI = "https://doi.org/10.1023/A:1015741304337", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 6 12:13:23 MDT 2005", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=23&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=23&issue=1&spage=105", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Baiardi:2001:CRD, author = "Fabrizio Baiardi and Paolo Mori and Laura Ricci", title = "Collecting Remote Data in Irregular Problems with Hierarchical Representation of the Domain", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "304--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310304.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310304.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Banikazemi:2001:MLE, author = "Mohammad Banikazemi and Rama K. Govindaraju and Robert Blackmore and Dhabaleswar K. Panda", title = "{MPI-LAPI}: An Efficient Implementation of {MPI} for {IBM RS\slash 6000 SP} Systems", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "12", number = "10", pages = "1081--1093", month = oct, year = "2001", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/71.963419", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Sat Feb 23 09:26:03 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/td/books/td2001/pdf/l1081.pdf; http://www.computer.org/tpds/td2001/l1081abs.htm", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Baptista:2001:IOS, author = "Tiago Baptista and Hernani Pedroso and Jo{\~a}o Gabriel Silva", title = "The Implementation of One-Sided Communications for {WMPI II}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "61--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310061.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310061.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bencheva:2001:MPI, author = "G. Bencheva", title = "{MPI} Parallel Implementation of a Fast Separable Solver", journal = j-LECT-NOTES-COMP-SCI, volume = "2179", pages = "454--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:06:22 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t2179.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2179/21790454.htm; http://link.springer-ny.com/link/service/series/0558/papers/2179/21790454.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Berthou:2001:COH, author = "Jean-Yves Berthou and Eric Fayolle", title = "Comparing {OpenMP}, {HPF}, and {MPI} Programming: a Study Case", journal = j-IJHPCA, volume = "15", number = "3", pages = "297--309", month = "Fall", year = "2001", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Mon Nov 05 16:09:36 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Bhandarkar:2001:ALB, author = "Milind Bhandarkar and L. V. Kal{\'e} and Eric de Sturler and Jay Hoeflinger", title = "Adaptive Load Balancing for {MPI} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2074", pages = "108--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:30 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001b.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740108.htm; http://link.springer-ny.com/link/service/series/0558/papers/2074/20740108.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{biewski:2001:MOS, author = "Maciej Go biewski and Jesper Larsson Tr{\"a}ff", title = "{MPI-2} One-Sided Communications on a {Giganet SMP} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "16--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310016.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310016.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Blikberg:2001:NPA, author = "Ragnhild Blikberg and Tor S{\o}revik", title = "Nested parallelism: Allocation of threads to tasks and {OpenMP} implementation", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "185--194", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C11%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Booth:2001:OML, author = "Stephen Booth", title = "Optimising the {MPI} Library for the {T3E}", journal = j-LECT-NOTES-COMP-SCI, volume = "2150", pages = "80--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:53 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500080.htm; http://link.springer-ny.com/link/service/series/0558/papers/2150/21500080.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bova:2001:PPM, author = "Steve W. Bova and Clay P. Breshears and Henry Gabb and Bob Kuhn and Bill Magro and Rudolf Eigenmann and Greg Gaertner and Stefano Salvini and Howard Scott", title = "Parallel Programming with Message Passing and Directives", journal = j-COMPUT-SCI-ENG, volume = "3", number = "5", pages = "22--37", month = sep # "\slash " # oct, year = "2001", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/5992.947105", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Sat Feb 23 06:37:33 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://computer.org/cise/cs2001/c5022abs.htm; http://dlib.computer.org/cs/books/cs2001/pdf/c5022.pdf", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Brunst:2001:POL, author = "Holger Brunst and Hans-Christian Hoppe and Wolfgang E. Nagel and Manuela Winkler", title = "Performance Optimization for Large Scale Computing: The Scalable {VAMPIR} Approach", journal = j-LECT-NOTES-COMP-SCI, volume = "2074", pages = "751--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:30 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740751.htm; http://link.springer-ny.com/link/service/series/0558/papers/2074/20740751.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bu:2001:PAC, author = "Libor Bu and Pavel Tvrd{\'\i}k", title = "A Parallel Algorithm for Connected Components on Distributed Memory Machines", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "280--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310280.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310280.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bubak:2001:PMS, author = "Marian Bubak and W{\l}odzimierz Funika and Bartosz Bali and Roland Wism{\"u}ller", title = "Performance Measurement Support for {MPI} Applications with {PATOP}", journal = j-LECT-NOTES-COMP-SCI, volume = "1947", pages = "288--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:51 MST 2002", bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001a.bib; http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470288.htm; http://link.springer-ny.com/link/service/series/0558/papers/1947/19470288.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bull:2001:MSO, author = "J. Mark Bull and Darragh O'Neill", title = "A microbenchmark suite for {OpenMP 2.0}", journal = j-COMP-ARCH-NEWS, volume = "29", number = "5", pages = "41--48", month = dec, year = "2001", CODEN = "CANED2", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Fri May 12 09:41:22 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Cappello:2001:UPS, author = "Franck Cappello and Olivier Richard and Daniel Etiemble", title = "Understanding performance of {SMP} clusters running {MPI} programs", journal = j-FUT-GEN-COMP-SYS, volume = "17", number = "6", pages = "711--720", month = apr, year = "2001", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 27 12:41:21 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/0167739X; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/19/19/45/33/30/abstract.html", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Caubet:2001:DTM, author = "Jordi Caubet and Judit Gimenez and Jesus Labarta and Luiz DeRose", title = "A Dynamic Tracing Mechanism for Performance Analysis of {OpenMP} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "53--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040053.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040053.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{Chandra:2001:PPO, author = "Rohit Chandra and Leonardo Dagum and David Kohr and Dror Maydan and Jeff McDonald and Ramesh Menon", title = "Parallel Programming in {OpenMP}", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xvi + 230", year = "2001", ISBN = "1-55860-671-8", ISBN-13 = "978-1-55860-671-5", LCCN = "QA76.642 .P38 2001", bibdate = "Thu Jul 14 11:09:17 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", price = "US\$39.95", URL = "http://www.mkp.com/books_catalog/catalog.asp?ISBN=1-55860-671-8", abstract = "The rapid and widespread acceptance of shared memory multiprocessor architectures has created a pressing demand for an efficient way to program these systems. At the same time, developers of technical and scientific applications in industry and in government laboratories find they need to parallelize huge volumes of code in a portable fashion. OpenMP, developed jointly by several parallel computing vendors to address these issues, is an industry-wide standard for programming shared-memory and distributed shared-memory multiprocessors. It consists of a set of compiler directives and library routines that extend FORTRAN, C, and C++ codes to express shared-memory parallelism. Parallel Programming in OpenMP is the first book to teach both the novice and expert parallel programmers how to program using this new standard. The authors, who helped design and implement OpenMP while at SGI, bring a depth and breadth to the book as compiler writers, application developers, and performance engineers.", acknowledgement = ack-nhfb, keywords = "parallel programming (computer science)", tableofcontents = "Foreword \\ Preface \\ 1: Introduction \\ Performance with OpenMP \\ A first glimpse of OpenMP \\ The OpenMP parallel computer \\ Why OpenMP \\ History of OpenMP \\ Navigating the rest of the book \\ 2: Getting started with OpenMP \\ 3: Exploiting loop-level parallelism \\ Meaning of the parallel do directive \\ Controlling data sharing \\ Removing data dependences \\ Enhancing performance \\ 4: Beyond loop-level parallelism, parallel regions \\ 5: Synchronization \\ 6: Performance", } @Article{Chapman:2001:PDE, author = "B. Chapman and O. Hernandez and A. Patil and A. Prabhakar", title = "Program Development Environment for {OpenMP} Programs on {ccNUMA} Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "2179", pages = "210--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:06:22 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2179.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2179/21790210.htm; http://link.springer-ny.com/link/service/series/0558/papers/2179/21790210.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Chen:2001:FFT, author = "Qun Chen and Michael C. Ferris", title = "{FATCOP}: a Fault Tolerant {Condor--PVM} Mixed Integer Programming Solver", journal = j-SIAM-J-OPT, volume = "11", number = "4", pages = "1019--1036", month = mar # "\slash " # may, year = "2001", CODEN = "SJOPE8", DOI = "https://doi.org/10.1137/S1052623499353911", ISSN = "1052-6234 (print), 1095-7189 (electronic)", ISSN-L = "1052-6234", MRclass = "90C11 (65K05)", MRnumber = "MR1855219 (2002f:90068)", bibdate = "Sat Oct 4 12:16:05 MDT 2003", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIOPT/11/4; https://www.math.utah.edu/pub/tex/bib/pvm.bib; MathSciNet database", URL = "http://epubs.siam.org/sam-bin/dbq/article/35391", acknowledgement = ack-nhfb, fjournal = "SIAM Journal on Optimization", journal-URL = "http://epubs.siam.org/siopt", } @Article{Chen:2001:TMK, author = "Yu Chen and Qian Fang and Zhihui Du and Sanli Li", title = "{TH-MPI}: {OS} Kernel Integrated Fault Tolerant {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "75--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310075.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310075.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Czarnul:2001:DPD, author = "Pawel Czarnul and Karen Tomko and Henryk Krawczyk", title = "Dynamic Partitioning of the Divide-and-Conquer Scheme with Migration in {PVM} Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "174--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310174.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310174.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Darema:2001:SMP, author = "Frederica Darema", title = "The {SPMD} Model: Past, Present and Future", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "1--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310001.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310001.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dehne:2001:CPD, author = "Frank Dehne and Todd Eavis and Andrew Rau-Chaplin", title = "Computing Partial Data Cubes for Parallel Data Warehousing Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "319--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310319.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310319.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Demaine:2001:GCM, author = "E. D. Demaine and I. Foster and C. Kesselman and M. Snir", title = "Generalized Communicators in the Message Passing Interface", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "12", number = "6", pages = "610--616", month = jun, year = "2001", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/71.932714", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Jul 20 11:51:59 MDT 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/td/books/td2001/pdf/l0610.pdf; http://www.computer.org/tpds/td2001/l0610abs.htm", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Denis:2001:THP, author = "Alexandre Denis and Christian P{\'e}rez and Thierry Priol", title = "Towards High Performance {CORBA} and {MPI} Middlewares for Grid Computing", journal = j-LECT-NOTES-COMP-SCI, volume = "2242", pages = "14--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:01 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2242.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2242/22420014.htm; http://link.springer-ny.com/link/service/series/0558/papers/2242/22420014.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{DiMartino:2001:WDS, author = "Beniamino {Di Martino} and Sergio Briguglio and Gregorio Vlad and Giuliana Fogaccia", title = "Workload decomposition strategies for shared memory parallel systems with {OpenMP}", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "109--122", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C5%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Elwasif:2001:AMT, author = "Wael R. Elwasif and David E. Bernholdt and James A. Kohl and G. A. Geist", title = "An Architecture for a Multi-threaded Harness Kernel", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "126--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310126.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310126.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fagg:2001:FTM, author = "Graham E. Fagg and Antonin Bukovsky and Jack J. Dongarra", title = "Fault Tolerant {MPI} for the {HARNESS} Meta-computing System", journal = j-LECT-NOTES-COMP-SCI, volume = "2073", pages = "355--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:28 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730355.htm; http://link.springer-ny.com/link/service/series/0558/papers/2073/20730355.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fagg:2001:HFT, author = "Graham E. Fagg and Antonin Bukovsky and Jack J. Dongarra", title = "{HARNESS} and fault tolerant {MPI}", journal = j-PARALLEL-COMPUTING, volume = "27", number = "11", pages = "1479--1495", month = oct, year = "2001", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 22 16:52:42 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/01678191; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/35/21/47/41/32/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/47/41/32/article.pdf; http://www.netlib.org/utk/people/JackDongarra/PAPERS/harness-ftmpi-pc.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Fagg:2001:PIS, author = "Graham E. Fagg and Edgar Gabriel and Michael Resch and Jack J. Dongarra", title = "Parallel {IO} Support for Meta-computing Applications: {MPI\_Connect IO} Applied to {PACX--MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "135--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310135.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310135.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ferschweiler:2001:CDP, author = "Ken Ferschweiler and Mariacarla Calzarossa and Cherri Pancake and Daniele Tessera and Dylan Keon", title = "A Community Databank for Performance Tracefiles", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "233--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310233.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310233.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Field:2001:RTF, author = "Antony J. Field and Thomas L. Hansen and Paul H. J. Kelly", title = "Run-Time Fusion of {MPI} Calls in a Parallel {C++} Library", journal = j-LECT-NOTES-COMP-SCI, volume = "2017", pages = "363--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170363.htm; http://link.springer-ny.com/link/service/series/0558/papers/2017/20170363.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fischer:2001:DNM, author = "Markus Fischer and Peter Kemper", title = "Distributed Numerical {Markov} Chain Analysis", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "272--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310272.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310272.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Fischer:2001:SAN, author = "Markus Fischer", title = "System Area Network Extensions to the Parallel Virtual Machine", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "98--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310098.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310098.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Friedel:2001:HMC, author = "Peter Friedel and J{\"o}rg Bergmann and Stephan Seidl and Wolfgang E. Nagel", title = "An Hierarchical {MPI} Communication Model for the Parallelized Solution of Multiple Integrals", journal = j-LECT-NOTES-COMP-SCI, volume = "2110", pages = "474--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:11 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2110.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2110/21100474.htm; http://link.springer-ny.com/link/service/series/0558/papers/2110/21100474.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gaito:2001:ADC, author = "A. Gaito and M. Rak and U. Villano", title = "Adding Dynamic Coscheduling Support to {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "106--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310106.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310106.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gallud:2001:EDF, author = "J. A. Gallud and J. Garc{\'\i}a-Consuegra and J. M. Garc{\'\i}a and L. Orozco", title = "Evaluating the {DIPORSI} Framework: Distributed Processing of Remotely Sensed Imagery", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "401--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310401.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310401.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Geist:2001:BFN, author = "G. Al Geist", title = "Building a Foundation for the Next {PVM}: {Petascale Virtual Machines}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "2--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310002.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310002.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gerlach:2001:IOJ, author = "Jens Gerlach and Zheng-Yu Jiang and Hans-Werner Pohl", title = "Integrating {OpenMP} into {Janus}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "101--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040101.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040101.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gine:2001:MMM, author = "Francesc Gin{\'e} and Francesc Solsona and Xavi Navarro and Porfidio Hern{\'a}ndez and Emilio Luque", title = "{MemTo}: a Memory Monitoring Tool for a {Linux} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "225--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310225.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310225.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Golbiewski:2001:MOS, author = "Maciej Go{\l}biewski and Jesper Larsson Tr{\"a}ff", title = "{MPI-2} One-Sided Communications on a {Giganet SMP} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "16--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310016.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310016.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gonzalez:2001:DSP, author = "M. Gonzalez and E. Ayguad{\'e} and X. Martorell and J. Labarta", title = "Defining and Supporting Pipelined Executions in {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "155--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040155.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040155.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gonzalez:2001:MIM, author = "J. A. Gonz{\'a}lez and C. Le{\'o}n and C. Rodr{\'\i}guez and F. Sande", title = "A Model to Integrate Message Passing and Shared Memory Programming", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "114--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310114.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310114.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gonzalez:2001:OET, author = "Marc Gonzalez and Jose Oliver and Xavier Martorell and Eduard Ayguade and Jesus Labarta and Nacho Navarro", title = "{OpenMP} Extensions for Thread Groups and Their Run-Time Support", journal = j-LECT-NOTES-COMP-SCI, volume = "2017", pages = "324--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170324.htm; http://link.springer-ny.com/link/service/series/0558/papers/2017/20170324.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gorzig:2001:CCP, author = "Steffen G{\"o}rzig", title = "{CPPvm} --- {C++} and {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "83--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310083.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310083.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2001:CSA, author = "William D. Gropp", title = "Challenges and Successes in Achieving the Potential of {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "7--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310007.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310007.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2001:LSM, author = "William D. Gropp", title = "Learning from the Success of {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2228", pages = "81--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:07:14 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2228.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2228/22280081.htm; http://link.springer-ny.com/link/service/series/0558/papers/2228/22280081.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Hoeflinger:2001:IPV, author = "Jay Hoeflinger and Bob Kuhn and Wolfgang Nagel and Paul Petersen and Hrabri Rajic and Sanjiv Shah and Jeff Vetter and Michael Voss and Renee Woo", title = "An Integrated Performance Visualizer for {MPI\slash OpenMP} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "40--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040040.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040040.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Hoeflinger:2001:PSP, author = "Jay Hoeflinger and Prasad Alavilli and Thomas Jackson and Bob Kuhn", title = "Producing scalable performance with {OpenMP}: {Experiments} with two {CFD} applications", journal = j-PARALLEL-COMPUTING, volume = "27", number = "4", pages = "391--413", month = mar, year = "2001", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Jul 18 06:31:15 MDT 2001", bibsource = "http://www.elsevier.com/locate/issn/01678191; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/28/26/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/47/28/26/article.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Hu:2001:PCC, author = "Hong Hu and Edward L. Turner", title = "Parallel {CFD} Computing Using Shared Memory {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2073", pages = "1137--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:28 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20731137.htm; http://link.springer-ny.com/link/service/series/0558/papers/2073/20731137.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Huband:2001:DTB, author = "Simon Huband and Chris McDonald", title = "{DEPICT}: a Topology-Based Debugger for {MPI} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2026", pages = "109--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:43 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2026.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2026/20260109.htm; http://link.springer-ny.com/link/service/series/0558/papers/2026/20260109.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Huse:2001:LST, author = "Lars Paul Huse", title = "Layering {SHMEM} on Top of {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "44--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310044.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310044.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ilroy:2001:IMP, author = "Jonathan Ilroy and Cyrille Randriamaro and Gil Utard", title = "Improving {MPI-I/O} Performance on {PVFS}", journal = j-LECT-NOTES-COMP-SCI, volume = "2150", pages = "911--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:53 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500911.htm; http://link.springer-ny.com/link/service/series/0558/papers/2150/21500911.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Iwama:2001:PLS, author = "Kazuo Iwama and Daisuke Kawai and Shuichi Miyazaki and Yasuo Okabe and Jun Umemoto", title = "Parallelizing Local Search for {CNF} Satisfiability Using Vectorization and {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "1982", pages = "123--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:03 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1982.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1982/19820123.htm; http://link.springer-ny.com/link/service/series/0558/papers/1982/19820123.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Jorba:2001:SFF, author = "Josep Jorba and Tom{\`a}s Margalef and Emilio Luque", title = "Simulation of Forest Fire Propagation on Parallel {\&} Distributed {PVM} Platforms", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "386--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310386.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310386.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kaiser:2001:OCC, author = "Timothy H. Kaiser and Scott B. Baden", title = "Overlapping communication and computation with {OpenMP} and {MPI}", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "73--81", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C2%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Kambites:2001:OLI, author = "M. E. Kambites and J. Obdr{\v{z}}{\'a}lek and J. M. Bull", title = "An {OpenMP}-like interface for parallel programming in {Java}", journal = j-CCPE, volume = "13", number = "8--9", pages = "793--814", month = jul # "\slash " # aug, year = "2001", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.579", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Jul 25 10:55:47 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/84503220/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=84503220&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Kasahara:2001:ACG, author = "Hironori Kasahara and Motoki Obata and Kazuhisa Ishizaka", title = "Automatic Coarse Grain Task Parallel Processing on {SMP} Using {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2017", pages = "189--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170189.htm; http://link.springer-ny.com/link/service/series/0558/papers/2017/20170189.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kobler:2001:DOP, author = "Rene Kobler and Dieter Kranzlm{\"u}ller and Jens Volkert", title = "Debugging {OpenMP} Programs Using Event Manipulation", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "81--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040081.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040081.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Konstantinou:2001:TTO, author = "Dimitris Konstantinou and Nectarios Koziris and George Papakonstantinou", title = "{TOPPER}: a Tool for Optimizing the Performance of Parallel Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "148--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310148.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310148.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kranzlmuller:2001:IRM, author = "Dieter Kranzlm{\"u}ller and Christian Schaubschl{\"a}ger and Jens Volkert", title = "An Integrated Record{\&}Replay Mechanism for Nondeterministic Message Passing Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "192--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310192.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310192.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Krawczyk:2001:PIM, author = "Henryk Krawczyk and Jamil Saif", title = "Parallel Image Matching on {PC} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "312--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310312.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310312.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kucukboyaci:2001:PPT, author = "Vefa Kucukboyaci and Alireza Haghighat and Glenn E. Sjoden", title = "Performance of {PENTRAN TM} {$3$-D} Parallel Particle Transport Code on the {IBM SP2} and {PCTRAN} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "36--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310036.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310036.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Kusano:2001:OOC, author = "Kazuhiro Kusano and Mitsuhisa Sato and Takeo Hosomi and Yoshiki Seo", title = "The {Omni OpenMP} Compiler on the Distributed Shared Memory of {Cenju-4}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "20--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040020.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040020.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Labarta:2001:NOD, author = "J. Labarta and J. Oliver and D. S. Henty and Eduard Ayguad{\'e}", title = "New {OpenMP} directives for irregular data access loops", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "175--183", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C10%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", xxauthor = "J. Labarta and E. Ayguad{\'e} and J. Oliver and others", } @Article{Laforenza:2001:PHP, author = "Domenico Laforenza", title = "Programming High Performance Applications in Grid Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "8--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310008.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310008.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lee:2001:APT, author = "D. J. Lee and T. J. Downar", title = "The Application of {POSIX} Threads and {OpenMP} to the {U.S. NRC} Neutron Kinetics Code {PARCS}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "90--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040090.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040090.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Li:2001:PCS, author = "Michael Na Li and A. J. Rossini", title = "\pkg{RPVM}: Cluster Statistical Computing in {R}", journal = j-R-NEWS, volume = "1", number = "3", pages = "4--7", month = sep, year = "2001", CODEN = "????", ISSN = "1609-3631", ISSN-L = "1609-3631", bibdate = "Thu Aug 13 09:25:10 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/rjournal.bib", URL = "http://CRAN.R-project.org/doc/Rnews/", acknowledgement = ack-r-project, fjournal = "R News: the Newsletter of the R Project", journal-URL = "http://journal.r-project.org/", pdf = Rnews2001-3, } @Article{Li:2001:WMB, author = "Maozhen Li and Omer F. Rana and David W. Walker", title = "Wrapping {MPI}-based legacy codes as {Java\slash CORBA} components", journal = j-FUT-GEN-COMP-SYS, volume = "18", number = "2", pages = "213--223", month = oct, year = "2001", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 27 12:41:22 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/0167739X; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/19/19/60/31/29/abstract.html", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Luecke:2001:SPO, author = "Glenn R. Luecke and Wei-Hua Lin", title = "Scalability and performance of {OpenMP} and {MPI} on a 128-processor {SGI Origin 2000}", journal = j-CCPE, volume = "13", number = "10", pages = "905--928", day = "25", month = aug, year = "2001", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.588", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Feb 25 14:51:23 MST 2002", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/85007180/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=85007180&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Luo:2001:PDE, author = "Jun Luo and Sanguthevar Rajasekaran and Chenxia Qiu", title = "Parallizing $1$-Dimensional Estuarine Model", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "257--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310257.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310257.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Macias:2001:PPA, author = "Elsa M. Mac{\'\i}as and Alvaro Su{\'a}rez and C. N. Ojeda-Guerra and E. Robayna", title = "Programming Parallel Applications with {LAMGAC} in a {LAN--WLAN} Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "158--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310158.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310158.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Malfetti:2001:AOW, author = "Paolo Malfetti", title = "Application of {OpenMP} to weather, wave and ocean codes", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "99--107", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C4%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Manis:2001:PNP, author = "G. Manis", title = "Persistent and Non-persistent Data Objects on Top of {PVM} and {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "91--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310091.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310091.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Matthey:2001:EMO, author = "T. Matthey and J. P. Hansen", title = "Evaluation of {MPI}'s One-Sided Communication Mechanism for Short-Range Molecular Dynamics on the {Origin2000}", journal = j-LECT-NOTES-COMP-SCI, volume = "1947", pages = "356--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:51 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470356.htm; http://link.springer-ny.com/link/service/series/0558/papers/1947/19470356.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mattson:2001:EO, author = "Timothy Mattson", title = "The Evolution of {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "1947", pages = "19--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:51 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470019.htm; http://link.springer-ny.com/link/service/series/0558/papers/1947/19470019.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Matuszek:2001:APS, author = "Mariusz R. Matuszek", title = "Assessment of {PVM} Suitability to Testbed Client-Agent-Server Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "69--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310069.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310069.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Michailidis:2001:TSH, author = "Panagiotis D. Michailidis and Konstantinos G. Margaritis", title = "Text Searching on a Heterogeneous Cluster of Workstations", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "378--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310378.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310378.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Min:2001:PCO, author = "Seung Jai Min and Seon Wook Kim and Michael Voss and Sang Ik Lee and Rudolf Eigenmann", title = "Portable Compilers for {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "11--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040011.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040011.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Moore:2001:RPA, author = "Shirley Moore and David Cronk and Kevin London and Jack Dongarra", title = "Review of Performance Analysis Tools for {MPI} Parallel Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "241--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310241.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310241.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Moreno:2001:AEP, author = "Luz Marina Moreno and Francisco Almeida and Daniel Gonz{\'a}lez and Casiano Rodr{\'\i}guez", title = "Adaptive Execution of Pipelines", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "217--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310217.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310217.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Muller:2001:SSO, author = "Matthias M{\"u}ller", title = "Some Simple {OpenMP} Optimization Techniques", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "31--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040031.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040031.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Neophytou:2001:NDW, author = "Neophytos Neophytou and Paraskevas Evripidou", title = "{Net-dbx}: a {Web}-Based Debugger of {MPI} Programs Over Low-Bandwidth Lines", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "12", number = "9", pages = "986--995", month = sep, year = "2001", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/71.954636", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Sat Feb 23 09:26:03 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/td/books/td2001/pdf/l0986.pdf; http://www.computer.org/tpds/td2001/l0986abs.htm", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Nicolescu:2001:DTP, author = "Cristina Nicolescu and Pieter Jonker", title = "A Data and Task Parallel Image Processing Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "393--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310393.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310393.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nikolopoulos:2001:EMA, author = "D. S. Nikolopoulos and E. Artiaga and E. Ayguad{\'e} and J. Labarta", title = "Exploiting memory affinity in {OpenMP} through schedule reuse", journal = j-COMP-ARCH-NEWS, volume = "29", number = "5", pages = "49--55", month = dec, year = "2001", CODEN = "CANED2", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Fri May 12 09:41:22 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Nikolopoulos:2001:SID, author = "Dimitrios S. Nikolopoulos and Eduard Ayguad{\'e}", title = "A Study of Implicit Data Distribution Methods for {OpenMP} Using the {SPEC} Benchmarks", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "115--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040115.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040115.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Okulicka-Dluzewska:2001:PFE, author = "Felicja Okulicka-D{\l}uzewska", title = "Parallelization of Finite Element Package by {MPI} Library", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "427--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310427.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310427.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ong:2001:SUC, author = "Emil Ong and Ewing Lusk and William Gropp", title = "Scalable {Unix} Commands for Parallel Processors: a High-Performance Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "410--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310410.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310410.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pagourtzis:2001:PCT, author = "Aris Pagourtzis and Igor Potapov and Wojciech Rytter", title = "{PVM} Computation of the Transitive Closure: The Dependency Graph Approach", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "249--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310249.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310249.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Papadopoulos:2001:NRC, author = "Philip M. Papadopoulos and Mason J. Katz and Greg Bruno", title = "{NPACI} Rocks Clusters: Tools for Easily Deploying and Maintaining Manageable High-Performance {Linux} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "10--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310010.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310010.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Park:2001:CSL, author = "So-Hee Park and Mi-Young Park and Yong-Kee Jun", title = "A Comparison of Scalable Labeling Schemes for Detecting Races in {OpenMP} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "68--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040068.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040068.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Park:2001:PPE, author = "Insung Park and Michael J. Voss and Seon Wook Kim and Rudolf Eigenmann", title = "Parallel programming environment for {OpenMP}", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "143--161", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C8%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", xxpages = "143--162", } @Article{Pears:2001:DLB, author = "Arnold N. Pears and Nicola Thong", title = "A Dynamic Load Balancing Architecture for {PDES} Using {PVM} on Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "166--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310166.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310166.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pedroso:2001:WLE, author = "Hern{\^a}ni Pedroso and Jo{\~a}o Gabriel Silva", title = "The {WMPI} Library Evolution: Experience with {MPI} Development for {Windows} Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "1900", pages = "1157--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:44 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001157.htm; http://link.springer-ny.com/link/service/series/0558/papers/1900/19001157.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Petcu:2001:WMM, author = "Dana Petcu", title = "Working with Multiple {Maple} Kernels Connected by {Distributed Maple} or {PVMaple}", type = "Technical report", institution = "Westers University of Timisoara", address = "Timisoara, Romania", month = mar, year = "2001", bibdate = "Wed Dec 17 18:07:37 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.risc.uni-linz.ac.at/software/distmaple/index_1.html", URL = "http://www.risc.uni-linz.ac.at/software/distmaple/misc/petcu2001.ps.gz", acknowledgement = ack-nhfb, keywords = "Distributed Maple; PVMaple", } @Article{Plagianakos:2001:LCP, author = "V. P. Plagianakos and N. K. Nousis and M. N. Vrahatis", title = "Locating and computing in parallel all the simple roots of special functions using {PVM}", journal = j-J-COMPUT-APPL-MATH, volume = "133", number = "1--2", pages = "545--554", day = "1", month = aug, year = "2001", CODEN = "JCAMDI", DOI = "https://doi.org/10.1016/S0377-0427(00)00675-0", ISSN = "0377-0427 (print), 1879-1778 (electronic)", ISSN-L = "0377-0427", bibdate = "Sat Feb 25 12:45:19 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib; https://www.math.utah.edu/pub/tex/bib/jcomputapplmath2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0377042700006750", acknowledgement = ack-nhfb, fjournal = "Journal of Computational and Applied Mathematics", journal-URL = "http://www.sciencedirect.com/science/journal/03770427", } @Article{Plunkett:2001:AMD, author = "Craig L. Plunkett and Alfred G. Striz and J. Sobieszczanski-Sobieski", title = "Application of {MPI} in Displacement Based Multilevel Structural Optimization", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "335--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310335.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310335.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Pringle:2001:TPF, author = "Gavin J. Pringle and Steven P. Booth and Hugh M. P. Couchman and Frazer R. Pearce and Alan D. Simpson", title = "Towards a Portable, Fast Parallel {AP$^3$M-SPH} Code: {HYDRA\_MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "360--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310360.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310360.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Prost:2001:MIG, author = "Jean-Pierre Prost and Richard Treumann and Richard Hedges and Bin Jia and Alice Koniges", title = "{MPI-IO\slash GPFS}, an Optimized Implementation of {MPI-IO} on top of {GPFS}", crossref = "ACM:2001:SHP", pages = "??--??", year = "2001", bibdate = "Sat Feb 10 14:28:55 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2001.org/papers/pap.pap186.pdf", acknowledgement = ack-nhfb, pagecount = "15", } @Article{Prost:2001:THP, author = "Jean-Pierre Prost and Richard Treumann and Richard Hedges and Alice Koniges and Alison White", title = "Towards a High-Performance Implementation of {MPI--IO} on Top of {GPFS}", journal = j-LECT-NOTES-COMP-SCI, volume = "1900", pages = "1253--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:44 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001253.htm; http://link.springer-ny.com/link/service/series/0558/papers/1900/19001253.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Protopopov:2001:MMP, author = "Boris V. Protopopov and Anthony Skjellum", title = "A Multithreaded {Message Passing Interface (MPI)} Architecture: Performance and Program Issues", journal = j-J-PAR-DIST-COMP, volume = "61", number = "4", pages = "449--466", day = "1", month = apr, year = "2001", CODEN = "JPDCER", DOI = "https://doi.org/10.1006/jpdc.2000.1674", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Feb 22 15:30:36 MST 2002", bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674/pdf; http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674/ref", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Rabenseifner:2001:ECF, author = "Rolf Rabenseifner and Alice E. Koniges", title = "Effective Communication and File-{I/O} Bandwidth Benchmarks", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "24--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310024.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310024.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Rageb:2001:CEM, author = "Khaled Rageb and Wolfgang Rehm", title = "{CHEMPI}: efficient {MPI} for {VIA\slash SCI}", type = "{Preprint-Reihe des Chemnitzer}", number = "{SFB 393}", institution = "Technische Universit{\"a}t Chemnitz", address = "Chemnitz, Germany", pages = "12", year = "2001", bibdate = "Wed Aug 27 06:45:29 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Reinefeld:2001:CDI, author = "Alexander Reinefeld", title = "Clusters for Data-Intensive Applications in the Grid", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "12--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310012.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310012.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Reussner:2001:APP, author = "Ralf Reussner and Gunnar Hunzelmann", title = "Achieving Performance Portability with {SKaMPI} for High-Performance {MPI} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2074", pages = "841--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:30 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740841.htm; http://link.springer-ny.com/link/service/series/0558/papers/2074/20740841.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Reussner:2001:SSK, author = "Ralf H. Reussner", title = "{SKaMPI}: the special {Karlsruher} {MPI}-benchmark: user manual", type = "{Interner Bericht}", number = "99,02", institution = "Fakult{\"a}t f{\"u}r Informatik, Universit{\"a}t Karlsruhe", address = "Karlsruhe, Germany", pages = "78", year = "2001", bibdate = "Wed Aug 27 06:47:26 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Roig:2001:EMM, author = "Concepci{\'o} Roig and Ana Ripoll and Javier Borr{\'a}s and Emilio Luque", title = "Efficient Mapping for Message-Passing Applications Using the {TTIG} Model: a Case Study in Image Processing", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "370--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310370.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310370.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Roussos:2001:BMB, author = "George Roussos and B. J. C. Baxter", title = "Biharmonic Many Body Calculations for Fast Evaluation of Radial Basis Function Interpolants in Cluster Environments", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "288--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310288.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310288.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Rungsawang:2001:LCP, author = "A. Rungsawang and A. Laohakanniyom and M. Lertprasertkune", title = "Low-Cost Parallel Text Retrieval Using {PC}-Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "419--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310419.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310419.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sahimi:2001:AAS, author = "Mohd Salleh Sahimi and Norma Alias and Elankovan Sundararajan", title = "The {AGEB} Algorithm for Solving the Heat Equation in Three Space Dimensions and Its Parallelization Using {PVM}", journal = j-LECT-NOTES-COMP-SCI, volume = "2073", pages = "918--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:28 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730918.htm; http://link.springer-ny.com/link/service/series/0558/papers/2073/20730918.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sato:2001:CEO, author = "Mitsuhisa Sato and Hiroshi Harada and Atsushi Hasegawa and Yutaka Ishikawa", title = "Cluster-enabled {OpenMP}: An {OpenMP} compiler for the {SCASH} software distributed shared memory system", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "123--130", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C6%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Sato:2001:OGR, author = "Mitsuhisa Sato and Motonari Hirano and Yoshio Tanaka and Satoshi Sekiguchi", title = "{OmniRPC}: a {Grid} {RPC} Facility for Cluster and Global Computing in {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2104", pages = "130--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:04 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm; https://www.math.utah.edu/pub/tex/bib/lncs2001b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040130.htm; http://link.springer-ny.com/link/service/series/0558/papers/2104/21040130.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Satoh:2001:COT, author = "Shigehisa Satoh and Kazuhiro Kusano and Mitsuhisa Sato", title = "Compiler optimization techniques for {OpenMP} programs", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "131--142", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C7%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Schevtschenko:2001:PAS, author = "I. V. Schevtschenko", title = "A Parallel {ADI} and Steepest Descent Methods", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "265--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310265.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310265.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Shan:2001:CMS, author = "Hongzhang Shan and Jaswinder Pal Singh", title = "A Comparison of {MPI}, {SHMEM} and Cache-Coherent Shared Address Space Programming Models on a Tightly-Coupled Multiprocessors", journal = j-INT-J-PARALLEL-PROG, volume = "29", number = "3", pages = "283--318", month = jun, year = "2001", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Feb 20 09:55:15 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ipsapp009.lwwonline.com/content/getfile/4773/21/3/abstract.htm; http://ipsapp009.lwwonline.com/content/getfile/4773/21/3/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Skjellum:2001:OOA, author = "Anthony Skjellum and Diane G. Wooley and Ziyang Lu and Michael Wolf and Purushotham V. Bangalore and Andrew Lumsdaine and Jeffrey M. Squyres and Brian McCandless", title = "Object-oriented analysis and design of the {Message Passing Interface}", journal = j-CCPE, volume = "13", number = "4", pages = "245--292", day = "10", month = apr, year = "2001", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.556", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Jul 25 10:55:46 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/78502300/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=78502300&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Smith:2001:DMM, author = "Lorna Smith and Mark Bull", title = "Development of mixed mode {MPI\slash OpenMP} applications", journal = j-SCI-PROG, volume = "9", number = "2--3", pages = "83--98", month = "Spring--Summer", year = "2001", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Thu Mar 28 12:27:27 MST 2002", bibsource = "Compendex database; http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib; OCLC Article1st database", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C3%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Solsona:2001:IEI, author = "Francesc Solsona and Francesc Gin{\'e} and Porfidio Hern{\'a}ndez and Emilio Luque", title = "Implementing Explicit and Implicit Coscheduling in a {PVM} Environment (Research Note)", journal = j-LECT-NOTES-COMP-SCI, volume = "1900", pages = "1165--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:44 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001165.htm; http://link.springer-ny.com/link/service/series/0558/papers/1900/19001165.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{SousaPinto:2001:PEI, author = "Jorge {Sousa Pinto}", title = "Parallel Evaluation of Interaction Nets with {MPINE}", journal = j-LECT-NOTES-COMP-SCI, volume = "2051", pages = "353--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:04:07 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2051.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2051/20510353.htm; http://link.springer-ny.com/link/service/series/0558/papers/2051/20510353.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Sunderam:2001:CAP, author = "Vaidy Sunderam and Zsolt N{\'e}meth", title = "A Comparative Analysis of {PVM\slash MPI} and Computational {Grids}", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "14--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310014.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310014.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Suppi:2001:PCS, author = "Remo Suppi and Fernando Cores and Emilio Luque", title = "{PDES}: a Case Study Using the Switch Time Warp", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "327--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310327.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310327.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Swann:2001:SPC, author = "Christopher A. Swann", title = "Software for parallel computing: the {LAM} implementation of {MPI}", journal = j-J-APPL-ECONOMETRICS, volume = "16", number = "2", pages = "185--194", month = mar # "--" # apr, year = "2001", CODEN = "JAECET", DOI = "https://doi.org/10.1002/jae.595", ISSN = "0883-7252 (print), 1099-1255 (electronic)", ISSN-L = "0883-7252", bibdate = "Sat Mar 9 10:20:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jappleconometrics.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Applied Econometrics", journal-URL = "https://onlinelibrary.wiley.com/journal/10991255; https://www.jstor.org/journal/japplecon", onlinedate = "23 April 2001", } @Article{Takeda:2001:AME, author = "K. Takeda and N. K. Allsopp and J. C. Hardwick and P. C. Macey and D. A. Nicole and S. J. Cox and D. J. Lancaster", title = "An Assessment of {MPI} Environments for {Windows NT}", journal = j-J-SUPERCOMPUTING, volume = "19", number = "3", pages = "315--323", month = jul, year = "2001", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 25 09:05:33 MDT 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/issuetoc.htm/0920-8542+19+3+2001", URL = "http://www.wkap.nl/oasis.htm/338207", abstract = "In this paper we evaluate the MPI environments currently available for Windows NT on the Intel IA32 and Compaq DEC Alpha architectures. We present benchmark results for low-level communication and for the NAS Parallel Benchmarks to allow comparison with other systems, but our primary interest is determining real application performance and robustness in production cluster environments. For this we use PAFEC-FE, a large FORTRAN code for finite-element analysis. We present results from three MPI implementations, two architectures, and three networking technologies (10 and 100 Mbit/s Ethernet and 1 Gbit/s Myrinet).", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Tinetti:2001:HNW, author = "Fernando Tinetti and Antonio Quijano and Armando {De Giusti} and Emilio Luque", title = "Heterogeneous Networks of Workstations and the Parallel Matrix Multiplication", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "296--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310296.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310296.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Tourancheau:2001:SMN, author = "Bernard Tourancheau and Roland Westrelin", title = "Support for {MPI} at the Network Interface Level", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "52--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310052.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310052.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Trobec:2001:IEM, author = "R. Trobec and M.{\v{S}}terk and M. Praprotnik and D. Jane{\v{z}}i{\v{c}}", title = "Implementation and evaluation of {MPI}-based parallel {MD} program", journal = j-IJQC, volume = "84", number = "1", pages = "23--31", month = "????", year = "2001", CODEN = "IJQCB2", DOI = "https://doi.org/10.1002/qua.1303", ISSN = "0020-7608 (print), 1097-461X (electronic)", ISSN-L = "0020-7608", bibdate = "Wed Jul 25 09:32:26 MDT 2001", bibsource = "http://www.interscience.wiley.com/jpages/0020-7608; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/84002438/START; http://www3.interscience.wiley.com/cgi-bin/fulltext/84002438/FILE?TPL=ftx_start; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=84002438&PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Quantum Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/", } @Article{Uthayopas:2001:FSR, author = "Putchong Uthayopas and Sugree Phatanapherom", title = "Fast and Scalable Real-Time Monitoring System for {Beowulf} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "201--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310201.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310201.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Walker:2001:DLB, author = "Reginald L. Walker", title = "Dynamic Load Balancing Model: Preliminary Results for Parallel Pseudo-search Engine Indexers\slash Crawler Mechanisms Using {MPI} and Genetic Programming", journal = j-LECT-NOTES-COMP-SCI, volume = "1981", pages = "61--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:03:02 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1981.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1981/19810061.htm; http://link.springer-ny.com/link/service/series/0558/papers/1981/19810061.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Walker:2001:SEC, author = "Reginald L. Walker", title = "Search engine case study: searching the {Web} using genetic programming and {MPI}", journal = j-PARALLEL-COMPUTING, volume = "27", number = "1--2", pages = "71--89", month = jan, year = "2001", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Jul 18 06:31:14 MDT 2001", bibsource = "http://www.elsevier.com/locate/issn/01678191; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/25/25/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/47/25/25/article.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Wismuller:2001:UMT, author = "Roland Wism{\"u}ller", title = "Using Monitoring Techniques to Support the Cooperation of Software Components", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "183--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310183.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310183.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wolf:2001:APA, author = "Felix Wolf and Bernd Mohr", title = "Automatic Performance Analysis of {MPI} Applications Based on Event Traces", journal = j-LECT-NOTES-COMP-SCI, volume = "1900", pages = "123--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:02:44 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19000123.htm; http://link.springer-ny.com/link/service/series/0558/papers/1900/19000123.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Wu:2001:PCS, author = "Guang Jun Wu and Robert Roy", title = "Parallelization of Characteristics Solvers for {$3$D} Neutron Transport", journal = j-LECT-NOTES-COMP-SCI, volume = "2131", pages = "344--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Fri Feb 1 08:13:55 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310344.htm; http://link.springer-ny.com/link/service/series/0558/papers/2131/21310344.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Yero:2001:JOO, author = "Eduardo J. H. Yero and Marco A. A. Henriques and Javier R. Garc{\"\i}a and Alina C. Leyva", title = "{JOINT}: An Object Oriented Message Passing Interface for Parallel Programming in {Java}", journal = j-LECT-NOTES-COMP-SCI, volume = "2110", pages = "637--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:11 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2110.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2110/21100637.htm; http://link.springer-ny.com/link/service/series/0558/papers/2110/21100637.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Zhang:2001:PPV, author = "Xin Zhang and Lingli Ding and Elke A. Rundensteiner", title = "{PVM}: {Parallel View Maintenance} under Concurrent Data Updates of Distributed Sources", journal = j-LECT-NOTES-COMP-SCI, volume = "2114", pages = "230--??", year = "2001", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Feb 2 13:05:16 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2114.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2114/21140230.htm; http://link.springer-ny.com/link/service/series/0558/papers/2114/21140230.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Zoltani:2001:EPO, author = "Csaba K. Zoltani and Punyam Satya-narayana and Dixie Hisley", title = "Evaluating Performance of {OpenMP} and {MPI} on the {SGI Origin 2000} with Benchmarks of Realistic Problem Sizes", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "4", number = "4", pages = "??--??", month = dec, year = "2001", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/4/4.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Acacio:2002:MDM, author = "M. Acacio and O. C{\'a}novas and J. M. Garc{\'\i}a and P. E. L{\'o}pez-de-Teruel", title = "{MPI-Delphi}: an {MPI} implementation for visual programming environments and heterogeneous computing", journal = j-FUT-GEN-COMP-SYS, volume = "18", number = "3", pages = "317--333", month = jan, year = "2002", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 27 12:41:22 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/0167739X; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/19/19/60/32/28/abstract.html", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Bane:2002:EOA, author = "M. K. Bane and G. D. Riley", title = "Extended Overhead Analysis for {OpenMP} (Research Note)", journal = j-LECT-NOTES-COMP-SCI, volume = "2400", pages = "162--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:10:14 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000162.htm; http://link.springer-ny.com/link/service/series/0558/papers/2400/24000162.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Basumallik:2002:TOE, author = "Ayon Basumallik and Seung-Jai Min and Rudolf Eigenmann", title = "Towards {OpenMP} Execution on Software Distributed Shared Memory Systems", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "457--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270457.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270457.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bekas:2002:PCP, author = "Constantine Bekas and Efrosini Kokiopoulou and Efstratios Gallopoulos and Valeria Simoncini", title = "Parallel Computation of Pseudospectra Using Transfer Functions on a {MATLAB-MPI} Cluster Platform", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "199--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740199.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740199.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Bisseling:2002:FMF, author = "Georg Bi{\ss}eling and Hans-Christian Hoppe and Alexander Supalov and Pierre Lagier and Jean Latour", title = "{Fujitsu MPI-2}: Fast Locally, Reaching Globally", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "401--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740401.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740401.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Blanco:2002:PMA, author = "V. Blanco and L. Garc{\'\i}a and J. A. Gonz{\'a}lez and C. Rodr{\'\i}guez and G. Rodr{\'\i}guez", title = "A Performance Model for the Analysis of {OpenMP} Programs", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "139--151", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @InProceedings{Bosilca:2002:MVT, author = "George Bosilca and Aurelien Bouteiller and Franck Cappello and Samir Djilali and Gilles Fedak and Cecile Germain and Thomas Herault and Pierre Lemarinier and Oleg Lodygensky and Frederic Magniette and Vincent Neri and Anton Selikhov", title = "{MPICH-V}: Toward a Scalable Fault Tolerant {MPI} for Volatile Nodes", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap298.pdf", abstract = "Global Computing platforms, large scale clusters and future TeraGRID systems gather thousands of nodes for computing parallel scientific applications. At this scale, node failures or disconnections are frequent events. This Volatility reduces the MTBF of the whole system in the range of hours or minutes. We present MPICH-V, an automatic Volatility tolerant MPI environment based on uncoordinated checkpoint/ rollback and distributed message logging. MPICH-V architecture relies on Channel Memories, Checkpoint servers and theoretically proven protocols to execute existing or new, SPMD and Master-Worker MPI applications on volatile nodes. To evaluate its capabilities, we run MPICH-V within a framework for which the number of nodes, Channels Memories and Checkpoint Servers can be completely configured as well as the node Volatility. We present a detailed performance evaluation of every component of MPICH-V and its global performance for non-trivial parallel applications. Experimental results demonstrate good scalability and high tolerance to node volatility.", acknowledgement = ack-nhfb, } @Article{Brightwell:2002:DIM, author = "Ron Brightwell and Arthur B. Maccabe and Rolf Riesen", title = "Design and Implementation of {MPI} on {Portals 3.0}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "331--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740331.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740331.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Brightwell:2002:RMR, author = "Ron Brightwell", title = "Ready-Mode Receive: An Optimized Receive Function for {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "385--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740385.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740385.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Calderon:2002:IMI, author = "Alejandro Calder{\'o}n and F{\'e}lix Garc{\'\i}a and Jes{\'u}s Carretero and Jose M. P{\'e}rez and Javier Fern{\'a}ndez", title = "An Implementation of {MPI-IO} on Expand: a Parallel File System Based on {NFS} Servers", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "306--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740306.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740306.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Chapman:2002:APU, author = "B. Chapman and F. Bregier and A. Patil and A. Prabhakar", title = "Achieving performance under {OpenMP} on {ccNUMA} and software distributed shared memory systems", journal = j-CCPE, volume = "14", number = "8--9", pages = "713--739", month = jul # "\slash " # aug, year = "2002", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.646", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Nov 9 12:24:19 MST 2002", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/95016122/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=95016122{\&}PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Chapman:2002:PAD, author = "Barbara Chapman", title = "Parallel Application Development with the Hybrid {MPI $+$ OpenMP} Programming Model", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "13--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740013.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740013.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Cotronis:2002:MMP, author = "Yiannis Cotronis and Zacharias Tsiatsoulis", title = "Modular {MPI} and {PVM} Components", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "252--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740252.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740252.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Czarnul:2002:DTI, author = "Pawel Czarnul", title = "Development and Tuning of Irregular Divide-and-Conquer Applications in {DAMPVM\slash DAC}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "208--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740208.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740208.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{DeRose:2002:CCG, author = "L. DeRose and F. Wolf", title = "{CATCH} --- a Call-Graph Based Automatic Tool for Capture of Hardware Performance Metrics for {MPI} and {OpenMP} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "2400", pages = "167--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:10:14 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000167.htm; http://link.springer-ny.com/link/service/series/0558/papers/2400/24000167.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Ding:2002:MOP, author = "Yun He and Chris H. Q. Ding", key = "multidimensional arrays; index reshuffle; vacancy tracking cycles; global exchange; dynamical remapping; MPI; OpenMP; hybrid MPI/OpenMP; SMP cluster.", title = "{MPI} and {OpenMP} Paradigms on Cluster of {SMP} Architectures", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2002.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap325.pdf", abstract = "We investigate remapping multi-dimensional arrays on cluster of SMP architectures under OpenMP, MPI, and hybrid paradigms. Traditional method of array transpose needs an auxiliary array of the same size and a copy back stage. We recently developed an in-place method using vacancy tracking cycles. The vacancy tracking algorithm outperforms the traditional 2-array method as demonstrated by extensive comparisons. The independence of vacancy tracking cycles allows efficient parallelization of the in-place method on SMP architectures at node level. Performance of multi-threaded parallelism using OpenMP are tested with different scheduling methods and different number of threads. The vacancy tracking method is parallelized using several parallel paradigms. At node level, pure OpenMP outperforms pure MPI by a factor of 2.76. Across entire cluster of SMP nodes, the hybrid MPI/OpenMP implementation outperforms pure MPI by a factor of 4.44, demonstrating the validity of the parallel paradigm of mixing MPI with OpenMP.", acknowledgement = ack-nhfb, } @Article{DiSerio:2002:ENN, author = "Angela {Di Serio} and Mar{\'\i}a B. Ib{\'a}{\~n}ez", title = "Evaluation of a Nearest-Neighbor Load Balancing Strategy for Parallel Molecular Simulations in {MPI} Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "226--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740226.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740226.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Dow:2002:CMA, author = "Chyi-Ren Dow and Jong-Shin Chen and Min-Chang Hsieh", title = "Checkpointing {MPI} applications on symmetric multi-processor machines using {SMPCkpt}", journal = j-J-SYST-SOFTW, volume = "63", number = "2", pages = "137--150", day = "15", month = aug, year = "2002", CODEN = "JSSODM", ISSN = "0164-1212 (print), 1873-1228 (electronic)", ISSN-L = "0164-1212", bibdate = "Sat Oct 25 07:14:09 MDT 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of systems and software", journal-URL = "http://www.sciencedirect.com/science/journal/01641212", } @InProceedings{El-Ghazawi:2002:UPP, author = "Tarek El-Ghazawi and Fran{\c{c}}ois Cantonnet", title = "{UPC} Performance and Potential: a {NPB} Experimental Study", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap316.pdf", abstract = "UPC, or Unified Parallel C, is a parallel extension of ANSI C. UPC follows a distributed shared memory programming model aimed at leveraging the ease of programming of the shared memory paradigm, while enabling the exploitation of data locality. UPC incorporates constructs that allow placing data near the threads that manipulate them to minimize remote accesses. This paper gives an overview of the concepts and features of UPC and establishes, through extensive performance measurements of NPB workloads, the viability of the UPC programming language compared to the other popular paradigms. Further, through performance measurements we identify the challenges, the remaining steps and the priorities for UPC. It will be shown that with proper hand tuning libraries, UPC performance will be comparable incorporating such improvements into automatic compare quite favorably to message passing in ease and optimized collective operations to that of MPI. Furthermore, by compiler optimizations, UPC will of programming.", acknowledgement = ack-nhfb, keywords = "NPB (NAS Parallel Benchmark)", } @Article{Espenica:2002:PPA, author = "Roberto Espenica and Pedro Medeiros", title = "Porting {PVM} to the {VIA} Architecture Using a Fast Communication Library", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "341--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740341.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740341.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @TechReport{Fagg:2002:FTM, author = "Graham E. Fagg and Antonin Bukovsky and Sathish Vadhiyar and Jack J. Dongarra", title = "Fault Tolerant {MPI} for the {HARNESS MetaComputing} System", type = "Technical report", number = "????", institution = inst-UTK, address = inst-UTK:adr, pages = "14", year = "2002", bibdate = "Tue Jan 13 18:41:26 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/ft-mpi-iccs-gef.pdf", acknowledgement = ack-nhfb, } @TechReport{Fagg:2002:HFTa, author = "Graham E. Fagg and Jack J. Dongarra", title = "{HARNESS} Fault Tolerant {MPI} Design, Usage and Performance Issues", type = "Technical report", number = "????", institution = inst-UTK, address = inst-UTK:adr, year = "2002", bibdate = "Tue Jan 13 18:42:49 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/ft-mpi-fgcs-grid-se.pdf", acknowledgement = ack-nhfb, } @Article{Fagg:2002:HFTb, author = "Graham E. Fagg and Jack J. Dongarra", title = "{HARNESS} fault tolerant {MPI} design, usage and performance issues", journal = j-FUT-GEN-COMP-SYS, volume = "18", number = "8", pages = "1127--1142", month = oct, year = "2002", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Jan 10 10:03:29 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Field:2002:OSR, author = "A. J. Field and P. H. J. Kelly and T. L. Hansen", title = "Optimising Shared Reduction Variables in {MPI} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "2400", pages = "630--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:10:14 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000630.htm; http://link.springer-ny.com/link/service/series/0558/papers/2400/24000630.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Book{Garg:2002:TOA, author = "Rajat P. Garg and Ilya Sharapov", title = "Techniques for optimizing applications: high performance computing", publisher = pub-SUN-MICROSYSTEMS-PRESS, address = pub-SUN-MICROSYSTEMS-PRESS:adr, pages = "xliii + 616", year = "2002", ISBN = "0-13-093476-3", ISBN-13 = "978-0-13-093476-5", LCCN = "QA76.88 .G37 2002", bibdate = "Fri Apr 11 08:26:42 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; http://www.sun.com/blueprints/", series = "Sun BluePrints Program", URL = "http://www.sun.com/books/catalog/garg.html/index.html; http://www.sun.com/solutions/blueprints/tools/", acknowledgement = ack-nhfb, annote = "From the Web site: The \verb=HPC_code_examples.tar.Z= tar-file contains the source code, makefiles, and shell scripts required to compile, link, and run the example programs discussed in the book.", keywords = "Forte Developer; MPI; OpenMP; Sun ClusterTools; Sun Solaris", } @Article{Gine:2002:ALT, author = "Francesc Gin{\'e} and Francesc Solsona and Porfidio Hern{\'a}ndez and Emilio Luque", title = "Adjusting the Lengths of Time Slices when Scheduling {PVM} Jobs with High Memory Requirements", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "156--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740156.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740156.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Goedecker:2002:OPF, author = "Stefan Goedecker", title = "Optimization and parallelization of a force field for silicon using {OpenMP}", journal = j-COMP-PHYS-COMM, volume = "148", number = "1", pages = "124--135", day = "1", month = oct, year = "2002", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(02)00466-6", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:41:24 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465502004666", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Gonzalez:2002:DLP, author = "Marc Gonz{\'a}lez and Eduard Ayguad{\'e} and Xavier Martorell and Jes{\'u}s Labarta and Phu V. Luong", title = "Dual-Level Parallelism Exploitation with {OpenMP} in Coastal Ocean Circulation Modeling", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "469--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270469.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270469.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2002:BLC, author = "William Gropp", title = "Building Library Components that Can Use Any {MPI} Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "280--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740280.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740280.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2002:MG, author = "William Gropp and Ewing Lusk", title = "{MPI} on the {Grid}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "12--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740012.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740012.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2002:MNS, author = "William Gropp", title = "{MPICH2}: a New Start for {MPI} Implementations", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "7--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740007.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740007.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Hadjidoukas:2002:MOI, author = "Panagiotis E. Hadjidoukas and Eleftherios D. Polychronopoulos and Theodore S. Papatheodorou", title = "A Modular {OpenMP} Implementation for Clusters of Multiprocessors", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "153--168", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{He:2002:MOP, author = "Yun He and Chris H. Q. Ding", title = "{MPI} and {OpenMP} Paradigms on Cluster of {SMP} Architectures: The Vacancy Tracking Algorithm for Multi-Dimensional Array Transposition", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "117--128", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Heikonen:2002:ILB, author = "Jussi Heikonen and Kalle Eerola", title = "Improving Load Balance in a Weather Code: Asynchronous Output in {HIRLAM} with {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2367", pages = "567--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:54 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670567.htm; http://link.springer-ny.com/link/service/series/0558/papers/2367/23670567.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Huang:2002:DDD, author = "Wei Huang and Zhe Wang and Jie Ma", title = "Design of {DMPI} on {DAWNING-3000}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "314--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740314.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740314.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Huttunen:2002:MCC, author = "Pentti Huttunen and Jouni Ikonen and Jari Porras", title = "{MPIT} --- Communication\slash Computation Paradigm for Networks of {SMP} Workstations", journal = j-LECT-NOTES-COMP-SCI, volume = "2367", pages = "160--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Sep 12 08:36:35 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670160.htm; http://link.springer-ny.com/link/service/series/0558/papers/2367/23670160.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Isabel:2002:CMO, author = "Dorta Isabel and Le{\'o}n Coromoto and Rodr{\'\i}guez Casiano", title = "Comparing {MPI} and {OpenMP} implementations of the $0$-$1$ Knapsack Problem", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "129--137", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Islam:2002:IAC, author = "Mohammad Towhidul Islam and Parimala Thulasiraman and Ruppa K. Thulasiram", title = "Implementation of Ant Colony Optimization Algorithm for Mobile Ad hoc Network Applications: {OpenMP} Experiences", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "177--191", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Iwama:2002:PLS, author = "Kazuo Iwama and Daisuke Kawai and Shuichi Miyazaki and Yasuo Okabe and Jun Umemoto", title = "Parallelizing local search for {CNF} satisfiability using vectorization and {PVM}", journal = j-ACM-J-EXP-ALGORITHMICS, volume = "7", pages = "2--2", month = "????", year = "2002", CODEN = "????", DOI = "https://doi.org/10.1145/944618.944620", ISSN = "1084-6654", bibdate = "Mon Oct 6 16:04:20 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The purpose of this paper is to speed up the local search algorithm for the CNF Satisfiability problem. Our basic strategy is to run some 10$^5$ independent search paths simultaneously using PVM on a vector supercomputer VPP800, which consists of 40 vector processors. Using the above parallelization and vectorization together with some improvement of data structure, we obtained 600-times speedup in terms of the number of flips the local search can make per second, compared to the original GSAT by Selman and Kautz. We ran our parallel GSAT for benchmark instances and compared the running time with those of existing SAT programs. We could observe an apparent benefit of parallelization: Especially, we were able to solve two instances that have never been solved before this paper. We also tested parallel local search for the SAT encoding of the class scheduling problem. Again we were able to get almost the best answer in reasonable time.", acknowledgement = ack-nhfb, fjournal = "ACM Journal of Experimental Algorithmics", keywords = "algorithms; CNF Satisfiability; distributed computing; experimentation; local search algorithms; parallelization; PVM; vector supercomputer; vectorization", } @Article{Kabir:2002:DIS, author = "Yacine Kabir and A. Belhadj-Aissa", title = "Distributed Image Segmentation System by a Multi-agents Approach (Under {PVM} Environment)", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "138--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740138.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740138.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Karniadakis:2002:DLP, author = "Suchuan Dong and George Em. Karniadakis", title = "Dual-Level Parallelism for Deterministic and Stochastic {CFD} Problems", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2002.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap137.pdf", abstract = "A hybrid two-level parallelism using MPI/OpenMP is implemented in the general-purpose spectral/hp element CFD code NekTar to take advantage of the hierarchical structures arising in deterministic and stochastic CFD problems. We take a coarse grain approach to shared-memory parallelism with OpenMP and employ a workload-splitting scheme that can reduce the OpenMP synchronizations to the minimum. The hybrid implementation shows good scalability with respect to both the problem size and the number of processors in case of a fixed problem size. With the same number of processors, the hybrid model with 2 (or 4) OpenMP threads per MPI process is observed to perform better than pure MPI and pure OpenMP on the NCSA SGI Origin 2000, while the pure MPI model performs the best on the IBM SP3 at SDSC and on the Compaq Alpha cluster at PSC. A key new result is that the use of threads facilitates effectively prefinement, which is crucial to adaptive discretization using high-order methods.", acknowledgement = ack-nhfb, } @Book{Karniadakis:2002:PSC, author = "George Em Karniadakis and Robert M. Kirby", title = "Parallel Scientific Computing in {C++} and {MPI}: a Seamless Approach to Parallel Algorithms", publisher = pub-CAMBRIDGE, address = pub-CAMBRIDGE:adr, pages = "xi + 616", year = "2002", ISBN = "0-521-52080-0 (paperback), 0-521-81754-4 (hardcover)", ISBN-13 = "978-0-521-52080-5 (paperback), 978-0-521-81754-7 (hardcover)", LCCN = "QA76.58 .K37 2003", bibdate = "Wed Aug 27 06:43:56 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", price = "US\$50.00 (paperback), US\$130.00 (hardcover)", URL = "ftp://uiarchive.cso.uiuc.edu/pub/etext/gutenberg/; http://www.loc.gov/catdir/description/cam031/2002034805.html; http://www.loc.gov/catdir/samples/cam033/2002034805.html; http://www.loc.gov/catdir/toc/cam031/2002034805.html", acknowledgement = ack-nhfb, subject = "Parallel processing (Electronic computers); C++ (Computer program language); Data transmission systems", } @Article{Kasprzyk:2002:APV, author = "Leszek Kasprzyk and Ryszard Nawrowski and Andrzej Tomczewski", title = "Application of a Parallel Virtual Machine for the Analysis of a Luminous Field", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "122--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740122.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740122.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Keppens:2002:OPM, author = "R. Keppens and G. T{\'o}th", title = "{OpenMP} Parallelism for Multi-dimensional Grid-Adaptive Magnetohydrodynamic Simulations", journal = j-LECT-NOTES-COMP-SCI, volume = "2329", pages = "940--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:34 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2329.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2329/23290940.htm; http://link.springer-ny.com/link/service/series/0558/papers/2329/23290940.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Krawezik:2002:SOV, author = "G{\'e}raud Krawezik and Guillaume All{\'e}on and Franck Cappello", title = "{SPMD OpenMP} versus {MPI} on a {IBM SMP} for 3 Kernels of the {NAS} Benchmarks", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "425--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270425.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270425.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Krysztop:2002:IFP, author = "Bartosz Krysztop and Henryk Krawczyk", title = "Improving Flexibility and Performance of {PVM} Applications by Distributed Partial Evaluation", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "376--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740376.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740376.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Langlais:2002:SSM, author = "M. Langlais and G. Latu and J. Roman and P. Silan", title = "Stochastic Simulation of a Marine Host-Parasite System Using a Hybrid {MPI\slash OpenMP} Programming", journal = j-LECT-NOTES-COMP-SCI, volume = "2400", pages = "436--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:10:14 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000436.htm; http://link.springer-ny.com/link/service/series/0558/papers/2400/24000436.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lazzarino:2002:PBP, author = "Oscar Lazzarino and Andrea Sanna and Claudio Zunino and Fabrizio Lamberti", title = "A {PVM}-Based Parallel Implementation of the {REYES} Image Rendering Architecture", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "165--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740165.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740165.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lee:2002:IPC, author = "Nung Kion Lee and David Taniar and J. Wenny Rahayu and Mafruz Zaman Ashrafi", title = "Implementation of Parallel Collection Equi-Join Using {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "2367", pages = "217--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:54 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670217.htm; http://link.springer-ny.com/link/service/series/0558/papers/2367/23670217.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Lopez:2002:ESM, author = "F{\'e}lix C{\'e}sar Garc{\'\i}a L{\'o}pez and Nieves Luz Fr{\'\i}as Arrocha", title = "Expanding the Synchronization Model for {OpenMP}", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "169--175", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Luecke:2002:DDM, author = "Glenn R. Luecke and Yan Zou and James Coyle and Jim Hoekstra and Marina Kraeva", title = "Deadlock detection in {MPI} programs", journal = j-CCPE, volume = "14", number = "11", pages = "911--932", day = "25", month = aug, year = "2002", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.701", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Nov 9 12:24:19 MST 2002", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/97519209/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=97519209{\&}PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Macias:2002:SEA, author = "Elsa M. Mac{\'\i}as and Alvaro Su{\'a}rez", title = "Solving Engineering Applications with {LAMGAC} over {MPI-2}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "130--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740130.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740130.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mahinthakumar:2002:HMO, author = "G. Mahinthakumar and F. Saied", title = "A Hybrid {MPI-OpenMP} Implementation of an Implicit Finite-Element Code on Parallel Architectures", journal = j-IJHPCA, volume = "16", number = "4", pages = "371--393", month = "Winter", year = "2002", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Nov 28 06:52:13 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Marcos:2002:DDP, author = "Carlos de la Fuente Marcos and Pierre Barge and Ra{\'u}l de la Fuente Marcos", title = "Dust Dynamics in Protoplanetary Disks: Parallel Computing with {PVM}", journal = j-J-COMPUT-PHYS, volume = "176", number = "2", pages = "276--294", day = "1", month = mar, year = "2002", CODEN = "JCTPAH", DOI = "https://doi.org/10.1006/jcph.2001.6978", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Mon Jan 2 22:12:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999101969785", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Marowka:2002:ISI, author = "Ami Marowka", title = "Introduction to the special issue: {OpenMP}: Experiences, Implementations and Applications", journal = j-PARALLEL-DIST-COMP-PRACT, volume = "5", number = "2", pages = "v--v", month = jun, year = "2002", CODEN = "????", ISSN = "1097-2803", bibdate = "Thu Sep 2 12:08:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/5/2.toc", acknowledgement = ack-nhfb, fjournal = "PDCP: Parallel and Distributed Computing Practices", } @Article{Michailidis:2002:PSL, author = "Panagiotis D. Michailidis and Konstantinos G. Margaritis", title = "A Performance Study of Load Balancing Strategies for Approximate String Matching on an {MPI} Heterogeneous System Environment", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "432--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740432.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740432.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Mohr:2002:DPP, author = "Bernd Mohr and Allen D. Malony and Sameer Shende and Felix Wolf", title = "Design and Prototype of a Performance Tool Interface for {OpenMP}", journal = j-J-SUPERCOMPUTING, volume = "23", number = "1", pages = "105--128", month = aug, year = "2002", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jan 14 07:25:20 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/journalhome.htm/0920-8542", URL = "http://ipsapp008.kluweronline.com/content/getfile/5189/37/8/abstract.htm; http://ipsapp008.kluweronline.com/content/getfile/5189/37/8/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Muller:2002:SMB, author = "Matthias S. M{\"u}ller", title = "A Shared Memory Benchmark in {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "380--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270380.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270380.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nakajima:2002:PISa, author = "Kengo Nakajima and Hiroshi Okuda", title = "Parallel Iterative Solvers for Unstructured Grids Using an {OpenMP\slash MPI} Hybrid Programming Model for the {GeoFEM} Platform on {SMP} Cluster Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "437--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270437.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270437.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Nakajima:2002:PISb, author = "Kengo Nakajima and Hiroshi Okuda", title = "Parallel iterative solvers for unstructured grids using a directive\slash {MPI} hybrid programming model for the {GeoFEM} platform on {SMP} cluster architectures", journal = j-CCPE, volume = "14", number = "6--7", pages = "411--429", month = may # "\slash " # jun, year = "2002", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.622", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Nov 9 12:24:19 MST 2002", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/94515747/START; http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=94515747{\&}PLACEBO=IE.pdf", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Nakano:2002:SCG, author = "Hirofumi Nakano and Kazuhisa Ishizaka and Motoki Obata and Keiji Kimura and Hironori Kasahara", title = "Static Coarse Grain Task Scheduling with Cache Optimization Using {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "479--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270479.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270479.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Norden:2002:OVM, author = "M. Nord{\'e}n and S. Holmgren and M. Thun{\'e}", title = "{OpenMP} versus {MPI} for {PDE} Solvers Based on Regular Sparse Numerical Operators", journal = j-LECT-NOTES-COMP-SCI, volume = "2331", pages = "681--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:36 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2331.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2331/23310681.htm; http://link.springer-ny.com/link/service/series/0558/papers/2331/23310681.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Ong:2002:MRS, author = "Emil Ong", title = "{MPI Ruby}: Scripting in a Parallel Environment", journal = j-COMPUT-SCI-ENG, volume = "4", number = "4", pages = "78--82", month = jul # "\slash " # aug, year = "2002", CODEN = "CSENFA", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Sat Jan 3 18:25:00 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://csdl.computer.org/comp/mags/cs/2002/04/c4078abs.htm; http://csdl.computer.org/dl/mags/cs/2002/04/c4078.htm; http://csdl.computer.org/dl/mags/cs/2002/04/c4078.pdf", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @InProceedings{Phillips:2002:NBS, author = "James C. Phillips and Gengbin Zheng and Sameer Kumar and Laxmikant V. Kal{\'e}", title = "{NAMD}: Biomolecular Simulation on Thousands of Processors", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap277.pdf", abstract = "NAMD is a fully featured, production molecular dynamics program for high performance simulation of large biomolecular systems. We have previously, at SC2000, presented scaling results for simulations with cutoff electrostatics on up to 2048 processors of the ASCI Red machine, achieved with an object-based hybrid force and spatial decomposition scheme and an aggressive measurement-based predictive load balancing framework. We extend this work by demonstrating similar scaling on the much faster processors of the PSC Lemieux Alpha cluster, and for simulations employing efficient (order N log N) particle mesh Ewald full electrostatics. This unprecedented scalability in a biomolecular simulation code has been attained through latency tolerance, adaptation to multiprocessor nodes, and the direct use of the Quadrics Elan library in place of MPI by the Charm++/Converse parallel runtime system.", acknowledgement = ack-nhfb, } @Article{Piriyakumar:2002:EFI, author = "Douglas Antony Louis Piriyakumar and Paul Levi and Rolf Rabenseifner", title = "Enhanced File Interoperability with Parallel {MPI} File-{I/O} in Image Processing", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "174--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740174.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740174.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Plachetka:2002:QTS, author = "Tomas Plachetka", title = "(Quasi-) Thread-Safe {PVM} and (Quasi-) Thread-Safe {MPI} without Active Polling", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "296--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740296.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740296.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Prabhakar:2002:PCB, author = "Achal Prabhakar and Vladimir Getov and Barbara Chapman", title = "Performance Comparisons of Basic {OpenMP} Constructs", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "413--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270413.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270413.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Rauber:2002:LSH, author = "Thomas Rauber and Gudula R{\"u}nger", title = "Library Support for Hierarchical Multi-Processor Tasks", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap176.pdf", abstract = "The paper considers the modular programming with hierarchically structured multi-processor tasks on top of SPMD tasks for distributed memory machines. The parallel execution requires a corresponding decomposition of the set of processors into a hierarchical group structure onto which the tasks are mapped. This results in a multi-level group SPMD computation model with varying processor group structures. The advantage of this kind of mixed task and data parallelism is a potential to reduce the communication overhead and to increase scalability. We present a runtime library to support the coordination of hierarchically structured multi-processor tasks. The library exploits an extended parallel group SPMD programming model and manages the entire task execution including the dynamic hierarchy of processor groups. The library is built on top of MPI, has an easy-to-use interface, and leads to only a marginal overhead while allowing static planning and dynamic restructuring. Keywords: mixed task and data parallelism, multiprocessor tasks, multilevel group SPMD, hierarchical decomposition of processor sets, library support, distributed memory", acknowledgement = ack-nhfb, } @Article{Reussner:2002:SCB, author = "Ralf Reussner and Peter Sanders and Jesper Larsson Tr{\"a}ff", title = "{SKaMPI}: a comprehensive benchmark for public benchmarking of {MPI}", journal = j-SCI-PROG, volume = "10", number = "1", pages = "55--65", year = "2002", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Sat Oct 26 14:52:27 MDT 2002", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=9ejnuvwuvby9737jte27%26referrer=parent%26backto=issue%2C6%2C9%3Bjournal%2C2%2C12%3Blinkingpublicationresults%2C1%2C1", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Sack:2002:FMB, author = "Paul Sack and Anne C. Elster", title = "Fast {MPI} Broadcasts through Reliable Multicasting", journal = j-LECT-NOTES-COMP-SCI, volume = "2367", pages = "445--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:54 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670445.htm; http://link.springer-ny.com/link/service/series/0558/papers/2367/23670445.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Selikhov:2002:MCC, author = "Anton Selikhov and George Bosilca and Cecile Germain and Gilles Fedak and Franck Cappello", title = "{MPICH-CM}: a Communication Library Design for a {P2P MPI} Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "323--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740323.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740323.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Shires:2002:EHM, author = "D. Shires and R. Mohan", title = "An Evaluation of {HPF} and {MPI} Approaches and Performance in Unstructured Finite Element Simulations", journal = "Journal of Mathematical Modelling and Algorithms", volume = "1", number = "3", publisher = "Kluwer Academic Publishers, Dordrecht, The Netherlands", pages = "153--167", year = "2002", CODEN = "????", ISSN = "1570-1166", bibdate = "Sat Dec 7 09:42:43 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta database", acknowledgement = ack-nhfb, pagecount = "15", } @InProceedings{Sistare:2002:UHP, author = "Steven J. Sistare and Christopher J. Jackson", title = "Ultra-High Performance Communication with {MPI} and the {Sun Fire(\TM)} Link Interconnect", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap142.pdf", abstract = "We present a new low-latency system area network that provides the ultra-high bandwidth needed to fuse a collection of large SMP servers into a capability cluster. The network adapter exports a remote shared memory (RSM) model that supports low latency kernel bypass messaging. The Sun\TM{} MPI library uses the RSM interface to implement a highly efficient memory-to-memory messaging protocol in which the library directly manages buffers and data structures in remote memory. This allows flexible allocation of buffer space to active connections, while avoiding resource contention that could otherwise increase latencies. We discuss the characteristics of the interconnect, describe the MPI protocols, and measure the performance of a number of MPI benchmarks. Our results include MPI inter-node bandwidths of almost 3 Gigabytes per second and MPI ping-pong latencies as low as 3.7 microseconds.", acknowledgement = ack-nhfb, keywords = "interconnects; kernel bypass; MPI; performance evaluation; remote shared memory; SAN", } @Article{Smyk:2002:AMM, author = "Adam Smyk and Marek Tudruj", title = "Application of Mixed {{\em MPI OpenMP\/}} Programming in a Multi {SMP} Cluster Computer", journal = j-LECT-NOTES-COMP-SCI, volume = "2328", pages = "288--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Thu Sep 12 08:34:49 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2328.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2328/23280288.htm; http://link.springer-ny.com/link/service/series/0558/papers/2328/23280288.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Smyk:2002:OMP, author = "Adam Smyk and Marek Tudruj", title = "{\em {OpenMP\/}} / {\em {MPI\/}} Programming in a Multi-cluster System Based on Shared Memory\slash Message Passing Communication", journal = j-LECT-NOTES-COMP-SCI, volume = "2326", pages = "241--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2326.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2326/23260241.htm; http://link.springer-ny.com/link/service/series/0558/papers/2326/23260241.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Stpiczynski:2002:PPO, author = "Przemyslaw Stpiczynski", title = "{Parallel Programming in OpenMP} Helps Novices: a review of {Parallel Programming in OpenMP} by {Rohit Chandra}, {Leonardo Dagum}, {Dave Kohr}, {Dror Maydan}, {Jeff McDonald}, and {Ramesh Menon}", journal = j-IEEE-DISTRIB-SYST-ONLINE, volume = "3", number = "8", year = "2002", ISSN = "1541-4922 (print), 1558-1683 (electronic)", ISSN-L = "1541-4922", bibdate = "Wed Oct 23 17:47:56 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dsonline.computer.org/0208/d/bks_a.htm", acknowledgement = ack-nhfb, fjournal = "IEEE Distributed Systems Online", } @Article{Takahashi:2002:PEH, author = "Daisuke Takahashi and Mitsuhisa Sato and Taisuke Boku", title = "Performance Evaluation of the {Hitachi SR8000} Using {OpenMP} Benchmarks", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "390--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/lncs2002a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270390.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270390.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Thakur:2002:ONA, author = "Rajeev Thakur and William Gropp and Ewing Lusk", title = "Optimizing noncontiguous accesses in {MPI-IO}", journal = j-PARALLEL-COMPUTING, volume = "28", number = "1", pages = "83--105", month = jan, year = "2002", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 22 16:52:43 MST 2002", bibsource = "http://www.elsevier.com/locate/issn/01678191; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.elsevier.com/gej-ng/10/35/21/60/27/32/abstract.html; http://www.elsevier.nl/gej-ng/10/35/21/60/27/32/00001686.pdf", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tian:2002:IOC, author = "Xinmin Tian and Aart Bik and Milind Girkar and Paul Grey and Hideki Saito and Ernesto Su", title = "{Intel\reg{}} {OpenMP C++\slash Fortran} Compiler for Hyper-Threading Technology: Implementation and Performance", journal = j-INTEL-TECH-J, volume = "6", number = "1", pages = "36--46", month = feb, year = "2002", ISSN = "1535-766X", bibdate = "Thu Feb 28 15:24:21 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/intel-tech-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://developer.intel.com/technology/itj/2002/volume06issue01/vol6iss1_hyper_threading_technology.pdf", } @Article{Traff:2002:IMA, author = "Jesper Larsson Tr{\"a}ff", title = "Improved {MPI} All-to-all Communication on a {Giganet SMP} Cluster", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "392--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740392.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740392.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Traff:2002:IMP, author = "Jesper Larsson Traff", title = "Implementing the {MPI} Process Topology Mechanism", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap122.pdf", abstract = "The topology functionality of the Message Passing Interface (MPI) provides a portable, architecture-independent means for adapting application programs to the communication architecture of the target hardware. However, current MPI implementations rarely go beyond the most trivial implementation, and simply performs no process remapping. We discuss the potential of the topology mechanism for systems with a hierarchical communication architecture like clusters of SMP nodes. The MPI topology functionality is a weak mechanism, and we argue about some of its shortcomings. We formulate the topology optimization problem as a graph embedding problem, and show that for hierarchical systems it can be solved by graph partitioning. We state the properties of a new heuristic for solving both the embedding problem and the ``easier'' graph partitioning problem. The graph partitioning based framework has been fully implemented in MPI/SX for the NEC SX-series of parallel vector computers. MPI/SX is thus one of very few MPI implementations with a non-trivial topology functionality. On a 4 node NEC SX-6 significant communication performance improvements are achieved with synthetic MPI benchmarks.", acknowledgement = ack-nhfb, } @Article{Truong:2002:PAM, author = "Hong-Linh Truong and Thomas Fahringer and Michael Geissler and Georg Madsen", title = "Performance Analysis for {MPI} Applications with {SCALEA}", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "421--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740421.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740421.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Uehara:2002:MBP, author = "Hitoshi Uehara and Masanori Tamura and Mitsuo Yokokawa", title = "An {MPI} Benchmark Program Library and Its Application to the {Earth} Simulator", journal = j-LECT-NOTES-COMP-SCI, volume = "2327", pages = "219--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Tue Sep 10 19:09:32 MDT 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270219.htm; http://link.springer-ny.com/link/service/series/0558/papers/2327/23270219.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @InProceedings{Vadhiyar:2002:PMS, author = "Sathish S. Vadhiyar and Graham E. Fagg and Jack J. Dongarra", title = "Performance Modeling for Self Adapting Collective Communications for {MPI}", crossref = "Oldehoeft:2002:SIS", pages = "??--??", year = "2002", bibdate = "Tue Feb 26 06:44:44 2002", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/coll-lacsi-2001.pdf", acknowledgement = ack-nhfb, keywords = "Los Alamos Computer Science Institute (LASCI)", xxbooktitle = "LACSI Symposium 2001, October 15--18, Eldorado Hotel, Santa Fe, NM", } @Article{Vetter:2002:DSP, author = "Jeffrey Vetter", title = "Dynamic statistical profiling of communication activity in distributed applications", journal = j-SIGMETRICS, volume = "30", number = "1", pages = "240--250", month = jun, year = "2002", CODEN = "????", DOI = "https://doi.org/10.1145/511334.511364", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Thu Jun 26 11:38:22 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Performance analysis of communication activity for a terascale application with traditional message tracing can be overwhelming in terms of overhead, perturbation, and storage. We propose a novel alternative that enables dynamic statistical profiling of an application's communication activity using message sampling. We have implemented an operational prototype, named PHOTON, and our evidence shows that this new approach can provide an accurate, low-overhead, tractable alternative for performance analysis of communication activity. PHOTON consists of two components: a Message Passing Interface (MPI) profiling layer that implements sampling and analysis, and a modified MPI runtime that appends a small but necessary amount of information to individual messages. More importantly, this alternative enables an assortment of runtime analysis techniques so that, in contrast to post-mortem, trace-based techniques, the raw performance data can be jettisoned immediately after analysis. Our investigation shows that message sampling can reduce overhead to imperceptible levels for many applications. Experiments on several applications demonstrate the viability of this approach. For example, with one application, our technique reduced the analysis overhead from 154\% for traditional tracing to 6\% for statistical profiling. We also evaluate different sampling techniques in this framework. The coverage of the sample space provided by purely random sampling is superior to counter- and timer-based sampling. Also, PHOTON's design reveals that frugal modifications to the MPI runtime system could facilitate such techniques on production computing systems, and it suggests that this sampling technique could execute continuously for long-running applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", } @InProceedings{Vetter:2002:EPE, author = "Jeffrey S. Vetter and Andy Yoo", title = "An Empirical Performance Evaluation of Scalable Scientific Applications", crossref = "IEEE:2002:STI", pages = "??--??", year = "2002", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-2002.org/paperpdfs/pap.pap222.pdf", abstract = "We investigate the scalability, architectural requirements, and performance characteristics of eight scalable scientific applications. Our analysis is driven by empirical measurements using statistical and tracing instrumentation for both communication and computation. Based on these measurements, we refine our analysis into precise explanations of the factors that influence performance and scalability for each application; we distill these factors into common traits and overall recommendations for both users and designers of scalable platforms. Our experiments demonstrate that some traits, such as improvements in the scaling and performance of MPI's collective operations, will benefit most applications. We also find specific characteristics of some applications that limit performance. For example, one application's intensive use of a 64-bit, floating-point divide instruction, which has high latency and is not pipelined on the POWER3, limits the performance of the application's primary computation.", acknowledgement = ack-nhfb, } @Article{Wallcraft:2002:CCA, author = "Alan J. Wallcraft", title = "A Comparison of {Co-Array Fortran} and {OpenMP Fortran} for {SPMD} Programming", journal = j-J-SUPERCOMPUTING, volume = "22", number = "3", pages = "231--250", month = jul, year = "2002", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jan 14 07:25:19 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/journalhome.htm/0920-8542", URL = "http://ipsapp008.kluweronline.com/content/getfile/5189/36/1/abstract.htm; http://ipsapp008.kluweronline.com/content/getfile/5189/36/1/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Wang:2002:OPG, author = "Ping Wang", title = "{OpenMP} programming for a global inverse model", journal = j-SCI-PROG, volume = "10", number = "3", pages = "253--261", year = "2002", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Sat Oct 26 15:08:19 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Worsch:2002:BCM, author = "Thomas Worsch and Ralf Reussner and Werner Augustin", title = "On Benchmarking Collective {MPI} Operations", journal = j-LECT-NOTES-COMP-SCI, volume = "2474", pages = "271--??", year = "2002", CODEN = "LNCSD9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Sat Nov 30 20:57:35 MST 2002", bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740271.htm; http://link.springer.de/link/service/series/0558/papers/2474/24740271.pdf", acknowledgement = ack-nhfb, fjournal = "Lecture Notes in Computer Science", } @Article{Addison:2003:OIA, author = "C. Addison and Y. Ren and M. van Waveren", title = "{OpenMP} issues arising in the development of parallel {BLAS} and {LAPACK} libraries", journal = j-SCI-PROG, volume = "11", number = "2", pages = "95--104", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @TechReport{Amestoy:2003:IIMa, author = "Patrick R. Amestoy and Iain S. Duff and Jean-Yves L'Excellent and Xiaoye S. Li", title = "Impact of the implementation of {MPI} point-to-point communications on the performance of two general sparse solvers", type = "Report", number = "TR/PA/03/14 and RR-4372 and LBNL-48968 and RT/APO/01/4", institution = inst-CERFACS, address = inst-CERFACS:adr, pages = "????", year = "2003", bibdate = "Tue Jan 03 06:25:11 2006", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/duff-iain-s.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Amestoy:2003:IIMb, author = "Patrick R. Amestoy and Iain S. Duff and Jean-Yves L'Excellent and Xiaoye S. Li", title = "Impact of the implementation of {MPI} point-to-point communications on the performance of two general sparse solvers", journal = j-PARALLEL-COMPUTING, volume = "29", number = "7", pages = "833--849", month = jul, year = "2003", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Dec 24 09:07:26 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Anonymous:2003:MNIc, author = "Anonymous", title = "Micro News: {IBM} ups the ante in silicon transistor speed; New benchmark suite based on high-performance computing applications, {MPI} and {OpenMP} [{SPEC HPC2002}]; {EU} {OKs} {Hitachi}, {Mitsubishi Electric} semiconductor joint venture; {Intel} launches {Pentium 4} at {3.06 GHz}; {TSMC} unveils viable 25nm transistors", journal = j-IEEE-MICRO, volume = "23", number = "1", pages = "6--6, 87", month = jan # "\slash " # feb, year = "2003", CODEN = "IEMIDZ", ISSN = "0272-1732 (print), 1937-4143 (electronic)", ISSN-L = "0272-1732", bibdate = "Wed Apr 23 18:57:10 MDT 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeemicro.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dlib.computer.org/mi/books/mi2003/pdf/m1006.pdf", acknowledgement = ack-nhfb, fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", } @Article{Barekas:2003:MAO, author = "Vasileios K. Barekas and Panagiotis E. Hadjidoukas and Eleftherios D. Polychronopoulos and others", title = "A Multiprogramming Aware {OpenMP} Implementation", journal = j-SCI-PROG, volume = "11", number = "2", pages = "133--141", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @InProceedings{Bouteiller:2003:MVF, author = "Aurelien Bouteiller and Franck Cappello and Thomas Herault and Geraud Krawezik and Pierre Lemarinier and Frederic Magniette", title = "{MPICH-V2}: a Fault Tolerant {MPI} for Volatile Nodes based on Pessimistic Sender Based Message Logging", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10696#1; http://www.sc-conference.org/sc2003/paperpdfs/pap209.pdf", abstract = "Execution of MPI applications on clusters and Grid deployments suffering from node and network failures motivates the use of fault tolerant MPI implementations. We present MPICH-V2 (the second protocol of MPICHV project), an automatic fault tolerant MPI implementation using an innovative protocol that removes the most limiting factor of the pessimistic message logging approach: reliable logging of in transit messages. MPICH-V2 relies on uncoordinated checkpointing, sender based message logging and remote reliable logging of message logical clocks. This paper presents the architecture of MPICH-V2, its theoretical foundation and the performance of the implementation. We compare MPICH-V2 to MPICH-V1 and MPICH-P4 evaluating (a) its point-to-point performance, (b) the performance for the NAS benchmarks, (c) the application performance when many faults occur during the execution. Experimental results demonstrate that MPICH-V2 provides performance close to MPICH-P4 for applications using large messages while reducing dramatically the number of reliable nodes compared to MPICH-V1.", acknowledgement = ack-nhfb, } @Article{Brightwell:2003:DIP, author = "Ron Brightwell and Rolf Riesen and Arthur B. Maccabe", title = "Design, Implementation, and Performance of {MPI} on {Portals 3.0}", journal = j-IJHPCA, volume = "17", number = "1", pages = "7--20", month = "Spring", year = "2003", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Nov 28 06:52:13 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Briguglio:2003:PPM, author = "Sergio Briguglio and Beniamino {Di Martino} and Gregorio Vlad", title = "A performance-prediction model for {PIC} applications on clusters of Symmetric MultiProcessors: Validation with hierarchical {HPF $+$ OpenMP} implementation", journal = j-SCI-PROG, volume = "11", number = "2", pages = "159--176", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/hpfortran.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Bronevetsky:2003:AAL, author = "Greg Bronevetsky and Daniel Marques and Keshav Pingali and Paul Stodghill", title = "Automated application-level checkpointing of {MPI} programs", journal = j-SIGPLAN, pages = "84--94", year = "2003", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 22 16:52:42 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Carson:2003:CGU, author = "Brett Carson and Robert Murison and Ian A. Mason", title = "Computational Gains Using {RPVM} on a {Beowulf} Cluster", journal = j-R-NEWS, volume = "3", number = "1", pages = "21--26", month = jun, year = "2003", CODEN = "????", ISSN = "1609-3631", ISSN-L = "1609-3631", bibdate = "Thu Aug 13 09:25:10 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/rjournal.bib", URL = "http://CRAN.R-project.org/doc/Rnews/", acknowledgement = ack-r-project, fjournal = "R News: the Newsletter of the R Project", journal-URL = "http://journal.r-project.org/", pdf = Rnews2003-1, } @Article{Chen:2003:GMD, author = "L. Chen and C. LiWang and F. C. M. Lau", title = "A Grid Middleware for Distributed {Java} Computing with {MPI} Binding and Process Migration Supports", journal = j-J-COMP-SCI-TECH, volume = "18", number = "4", pages = "505--514", year = "2003", CODEN = "JCTEEM", ISSN = "1000-9000", ISSN-L = "1000-9000", bibdate = "Wed Aug 27 05:49:07 MDT 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta database", acknowledgement = ack-nhfb, fjournal = "Journal of computer science and technology", } @InProceedings{Coll:2003:SHB, author = "Salvador Coll and Jose Duato and Fabrizio Petrini and Francisco J. Mora", title = "Scalable Hardware-Based Multicast Trees", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10702#2; http://www.sc-conference.org/sc2003/paperpdfs/pap300.pdf", abstract = "This paper presents an algorithm for implementing optimal hardware-based multicast trees, on networks that provide hardware support for collective communication. Although the proposed methodology can be generalized to a wide class of networks, we apply our methodology to the Quadrics network, a state-of-the-art network that provides hardware-based multicast communication. The proposed mechanism is intended to improve the performance of the collective communication patterns on the network, in those cases where the hardware support can not be directly used, for instance, due to some faulty nodes. This scheme provides significant reduction on multicast latencies compared to the original system primitives, which use multicast trees based on unicast communication. A backtracking algorithm to find the optimal solution to the problem is presented. In addition, a greedy algorithm is presented and shown to provide near optimal solutions. Finally, our experimental results show the good performance and scalability of the proposed multicast tree in comparison to the traditional unicast-based multicast trees. Our multicast mechanism doubles barrier synchronization and broadcasts performance when compared to the production-level MPI library.", acknowledgement = ack-nhfb, } @Article{Cooperman:2003:UTC, author = "Gene Cooperman and Henri Casanova and Jim Hayes and Thomas Witzel", title = "Using {TOP-C} and {AMPIC} to port large parallel applications to the {Computational Grid}", journal = j-FUT-GEN-COMP-SYS, volume = "19", number = "4", pages = "587--596", month = may, year = "2003", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Jan 10 10:03:33 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", remark = "Selected papers from the IEEE/ACM International Symposium on Cluster Computing and the Grid, Berlin-Brandenburg Academy of Sciences and Humanities, Berlin, Germany, 21--24 May 2002.", } @Article{Czarnul:2003:PTA, author = "Pawel Czarnul", title = "Programming, Tuning and Automatic Parallelization of Irregular Divide-and-Conquer Applications in {DAMPVM\slash DAC}", journal = j-IJHPCA, volume = "17", number = "1", pages = "77--93", month = "Spring", year = "2003", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Nov 28 06:52:13 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{DePasquale:2003:UJU, author = "C. J. DePasquale", title = "Using the {JVMPI} to Understand the Behavior of {Java} Classes During the Development Process", journal = "Cmg", volume = "2", number = "??", publisher = "Computer Measurement Group", pages = "821--832", year = "2003", CODEN = "????", bibdate = "Sat Apr 3 08:12:24 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta database", acknowledgement = ack-nhfb, } @InProceedings{Fernandez:2003:BMN, author = "Juan Fernandez and Eitan Frachtenberg and Fabrizio Petrini", title = "{BCS-MPI}: a New Approach in the System Software Design for Large-Scale Parallel Computers", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10716#1; http://www.sc-conference.org/sc2003/paperpdfs/pap306.pdf", abstract = "Buffered CoScheduled MPI (BCS-MPI) introduces a new approach to design the communication layer for large-scale parallel machines. The emphasis of BCS-MPI is on the global coordination of a large number of communicating processes rather than on the traditional optimization of the point-to-point performance. BCS-MPI delays the interprocessor communication in order to schedule globally the communication pattern and it is designed on top of a minimal set of collective communication primitives. In this paper we describe a prototype implementation of BCS-MPI and its communication protocols. Several experimental results, executed on a set of scientific applications, show that BCS-MPI can compete with a production-level MPI implementation, but is much simpler to implement, debug and model. Keywords: MPI, buffered coscheduling, STORM, Quadrics, system software, communication protocols, cluster computing, large-scale parallel computers.", acknowledgement = ack-nhfb, } @InProceedings{Gabriel:2003:EPM, author = "Edgar Gabriel and Graham Fagg and Jack Dongarra", title = "Evaluating the Performance of {MPI-2} Dynamic Communicators and One-Sided Communication", crossref = "Dongarra:2003:RAP", pages = "??--??", year = "2003", bibdate = "Tue Jan 13 18:15:48 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/europvm-mpi-2003-mpi2.pdf", acknowledgement = ack-nhfb, } @InProceedings{Gabriel:2003:FTC, author = "Edgar Gabriel and Graham E. Fagg and Antonin Bukovsky and Thara Angskun and Jack J. Dongarra", editor = "????", booktitle = "{17th Annual ACM International Conference on Supercomputing (ICS'03) International Workshop on Grid Computing and e-Science, June 21, 2003, San Francisco}", title = "A Fault-Tolerant Communication Library for {Grid} Environments", publisher = "????", address = "????", pages = "??--??", year = "2003", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Tue Jan 13 18:14:32 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/FTMPI-SF-gabriel.pdf", acknowledgement = ack-nhfb, xxcrossref = "ACM:2003:CPI", } @Article{Gao:2003:LSP, author = "Shiwu Gao", title = "Linear-scaling parallelization of the {WIEN} package with {MPI}", journal = j-COMP-PHYS-COMM, volume = "153", number = "2", pages = "190--198", day = "15", month = jun, year = "2003", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/S0010-4655(03)00224-8", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:41:30 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465503002248", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Jin:2003:AMP, author = "Haoqiang Jin and Gabriele Jost and Jerry Yan and others", title = "Automatic multilevel parallelization using {OpenMP}", journal = j-SCI-PROG, volume = "11", number = "2", pages = "177--190", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Karonis:2003:MGG, author = "Nicholas T. Karonis and Brian Toonen and Ian Foster", title = "{MPICH-G2}: a {Grid}-enabled implementation of the {Message Passing Interface}", journal = j-J-PAR-DIST-COMP, volume = "63", number = "5", pages = "551--563", month = may, year = "2003", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Tue Dec 16 16:10:41 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Karwande:2003:CMC, author = "Amit Karwande and Xin Yuan and David K. Lowenthal", title = "{CC--MPI}: a compiled communication capable {MPI} prototype for {Ethernet} switched clusters", journal = j-SIGPLAN, volume = "38", number = "10", pages = "95--106", month = oct, year = "2003", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 22 16:52:42 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @InProceedings{Kee:2003:POP, author = "Yang-Suk Kee and Jin-Soo Kim and Soonhoi Ha", title = "{ParADE}: An {OpenMP} Programming Environment for {SMP} Cluster Systems", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2003.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10708#0; http://www.sc-conference.org/sc2003/paperpdfs/pap130.pdf", abstract = "Demand for programming environments to exploit clusters of symmetric multiprocessors (SMPs) is increasing. In this paper, we present a new programming environment, called ParADE, to enable easy, portable, and high-performance programming on SMP clusters. It is an OpenMP programming environment on top of a multi-threaded software distributed shared memory (SDSM) system with a variant of home-based lazy release consistency protocol. To boost performance, the runtime system provides explicit message-passing primitives to make it a hybrid-programming environment. Collective communication primitives are used for the synchronization and work-sharing directives associated with small data structures, lessening the synchronization overhead and avoiding the implicit barriers of work-sharing directives. The OpenMP translator bridges the gap between the OpenMP abstraction and the hybrid programming interfaces of the runtime system. The experiments with several NAS benchmarks and applications on a Linux-based cluster show promising results that ParADE overcomes the performance problem of the conventional SDSM-based OpenMP environment.", acknowledgement = ack-nhfb, keywords = "hybrid programming; MPI; OpenMP; programming environment; SMP cluster; software distributed shared memory", } @Article{Keller:2003:TEE, author = "Rainer Keller and Edgar Gabriel and Bettina Krammer and Matthias S. M{\"u}ller and Michael M. Resch", title = "Towards Efficient Execution of {MPI} Applications on the {Grid}: Porting and Optimization Issues", journal = j-J-GRID-COMP, volume = "1", number = "2", pages = "133--149", month = "????", year = "2003", CODEN = "????", ISSN = "1570-7873 (print), 1572-9184 (electronic)", ISSN-L = "1570-7873", bibdate = "Sat Dec 4 11:39:31 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/jrnltoc.htm/1570-7873", URL = "http://ipsapp008.kluweronline.com/IPS/content/ext/x/J/6160/I/4/A/4/abstract.htm", acknowledgement = ack-nhfb, fjournal = "Journal of Grid Computing", journal-URL = "http://link.springer.com/journal/10723", } @InProceedings{Komatitsch:2003:BDF, author = "Dimitri Komatitsch and Seiji Tsuboi and Chen Ji and Jeroen Tromp", title = "A 14.6 billion degrees of freedom, 5 teraflops, 2.5 terabyte earthquake simulation on the {Earth Simulator}", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10711#1; http://www.sc-conference.org/sc2003/paperpdfs/pap124.pdf", abstract = "We use 1944 processors of the Earth Simulator to model seismic wave propagation resulting from large earthquakes. Simulations are conducted based upon the spectral-element method, a high-degree finite-element technique with an exactly diagonal mass matrix. We use a very large mesh with 5.5 billion grid points (14.6 billion degrees of freedom). We include the full complexity of the Earth, i.e., a three-dimensional wave-speed and density structure, a 3-D crustal model, ellipticity as well as topography and bathymetry. A total of 2.5 terabytes of memory is needed. Our implementation is purely based upon MPI, with loop vectorization on each processor. We obtain an excellent vectorization ratio of 99.3\%, and we reach a performance of 5 teraflops (30\% of the peak performance) on 38\% of the machine. The very high resolution of the mesh allows us to perform fully three-dimensional calculations at seismic periods as low as 5 seconds.", acknowledgement = ack-nhfb, } @Article{Kranzlmuller:2003:RAP, author = "Dieter Kranzlm{\"u}ller and Peter Kacsuk and Jack Dongarra and Jens Volkert", title = "Recent Advances in Parallel Virtual Machine and Message Passing Interface (Select papers from the {EuroPVMMPI 2002 Conference})", journal = j-IJHPCA, volume = "17", number = "1", pages = "3--5", month = "Spring", year = "2003", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Nov 28 06:52:13 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @InProceedings{Li:2003:PNH, author = "Jianwei Li and Wei-keng Liao and Alok Choudhary and Robert Ross and Rajeev Thakur and William Gropp and Rob Latham and Andrew Siegel and Brad Gallagher and Michael Zingale", title = "{Parallel netCDF}: a High-Performance Scientific {I/O} Interface", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10722#1; http://www.sc-conference.org/sc2003/paperpdfs/pap258.pdf", abstract = "Dataset storage, exchange, and access play a critical role in scientific applications. For such purposes netCDF serves as a portable, efficient file format and programming interface, which is popular in numerous scientific application domains. However, the original interface does not provide an efficient mechanism for parallel data storage and access. In this work, we present a new parallel interface for writing and reading netCDF datasets. This interface is derived with minimal changes from the serial netCDF interface but defines semantics for parallel access and is tailored for high performance. The underlying parallel I/O is achieved through MPI-IO, allowing for substantial performance gains through the use of collective I/O optimizations. We compare the implementation strategies and performance with HDF5. Our tests indicate programming convenience and significant I/O performance improvement with this parallel netCDF (PnetCDF) interface.", acknowledgement = ack-nhfb, } @InProceedings{Liu:2003:PCM, author = "Jiuxing Liu and Balasubramanian Chandrasekaran and Jiesheng Wu and Weihang Jiang and Sushmitha Kini and Weikuan Yu and Darius Buntinas and Pete Wyckoff and D. K. Panda", title = "Performance Comparison of {MPI} Implementations over {InfiniBand}, {Myrinet} and {Quadrics}", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10696#0; http://www.sc-conference.org/sc2003/paperpdfs/pap310.pdf", abstract = "In this paper, we present a comprehensive performance comparison of MPI implementations over InfiniBand, Myrinet and Quadrics. Our performance evaluation consists of two major parts. The first part consists of a set of MPI level micro-benchmarks that characterize different aspects of MPI implementations. The second part of the performance evaluation consists of application level benchmarks. We have used the NAS Parallel Benchmarks and the sweep3D benchmark. We not only present the overall performance results, but also relate application communication characteristics to the information we acquired from the micro-benchmarks. Our results show that the three MPI implementations all have their advantages and disadvantages. For our 8-node cluster, InfiniBand can offer significant performance improvements for a number of applications compared with Myrinet and Quadrics when using the PCI-X bus. Even with just the PCI bus, InfiniBand can still perform better if the applications are bandwidth-bound.", acknowledgement = ack-nhfb, } @Article{Luecke:2003:CPM, author = "Glenn R. Luecke and Marina Kraeva and Lili Ju", title = "Comparing the performance of {MPICH} with {Cray}'s {MPI} and with {SGI}'s {MPI}", journal = j-CCPE, volume = "15", number = "9", pages = "779--802", day = "10", month = aug, year = "2003", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.719", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Jan 13 09:28:12 MST 2004", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 Jul 2003", } @Article{Luecke:2003:MCT, author = "Glenn Luecke and Hua Chen and James Coyle and Jim Hoekstra and Marina Kraeva and Yan Zou", title = "{MPI-CHECK}: a tool for checking {Fortran 90 MPI} programs", journal = j-CCPE, volume = "15", number = "2", pages = "93--100", month = feb, year = "2003", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.705", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Jan 13 09:28:06 MST 2004", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "6 Jan 2003", } @Article{Marowka:2003:EOT, author = "Ami Marowka", title = "Extending {OpenMP} for Task Parallelism", journal = j-PARALLEL-PROCESS-LETT, volume = "13", number = "3", pages = "341--??", month = sep, year = "2003", CODEN = "PPLTEE", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Sat Nov 6 18:06:31 MST 2004", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Mattson:2003:HGO, author = "Timothy G. Mattson", title = "How good is {OpenMP}?", journal = j-SCI-PROG, volume = "11", number = "2", pages = "81--93", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Michailidis:2003:PEL, author = "Panagiotis D. Michailidis and Konstantinos G. Margaritis", title = "Performance evaluation of load balancing strategies for approximate string matching application on an {MPI} cluster of heterogeneous workstations", journal = j-FUT-GEN-COMP-SYS, volume = "19", number = "7", pages = "1075--1104", month = oct, year = "2003", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Jan 10 10:03:37 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", remark = "Selected papers on Theoretical and Computational Aspects of Structural Dynamical Systems in Linear Algebra and Control.", } @Article{Min:2003:OOP, author = "Seung-Jai Min and Ayon Basumallik and Rudolf Eigenmann", title = "Optimizing {OpenMP} Programs on Software Distributed Shared Memory Systems", journal = j-INT-J-PARALLEL-PROG, volume = "31", number = "3", pages = "225--249", month = jun, year = "2003", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jan 24 14:51:21 MST 2004", bibsource = "http://www.kluweronline.com/issn/0885-7458; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=5&LK=NM; http://ipsapp007.kluweronline.com/content/getfile/4773/33/5/abstract.htm; http://ipsapp007.kluweronline.com/content/getfile/4773/33/5/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @InProceedings{Moody:2003:SNB, author = "Adam Moody and Juan Fernandez and Fabrizio Petrini and Dhabaleswar K. Panda", title = "Scalable {NIC}-based Reduction on Large-Scale Clusters", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10716#2; http://www.sc-conference.org/sc2003/paperpdfs/pap316.pdf", abstract = "Many parallel algorithms require efficient reduction collectives. In response, researchers have designed algorithms considering a range of parameters including data size, system size, and communication characteristics. Throughout this past work, however, processing was limited to the host CPU. Today, modern Network Interface Cards (NICs) sport programmable processors with substantial memory, and thus introduce a fresh variable into the equation. In this paper, we investigate this new option in the context of large-scale clusters. Through experiments on the 960-node, 1920-processor ASCI Linux Cluster (ALC) at Lawrence Livermore National Laboratory, we show that NIC-based reductions outperform host-based algorithms in terms of reduced latency and increased consistency. In particular, in the largest configuration tested --- 1812 processors --- our NIC-based algorithm summed single-element vectors of 32-bit integers and 64-bit floating-point numbers in 73 $ \mu $ s and 118 $ \mu $ s, respectively. These results represent respective improvements of 121\% and 39\% over the production-level MPI library.", acknowledgement = ack-nhfb, } @Article{Muller:2003:OCB, author = "Matthias S. M{\"u}ller", title = "An {OpenMP} compiler benchmark", journal = j-SCI-PROG, volume = "11", number = "2", pages = "125--131", year = "2003", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Mon Jan 12 06:28:15 MST 2004", bibsource = "http://www.iospress.nl/site/html/10589244.html; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @InProceedings{Nakajima:2003:PIS, author = "Kengo Nakajima", title = "Parallel Iterative Solvers of {GeoFEM} with Selective Blocking Preconditioning for Nonlinear Contact Problems on the {Earth Simulator}", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2003.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10703#1; http://www.sc-conference.org/sc2003/paperpdfs/pap155.pdf", abstract = "An efficient parallel iterative method with selective blocking preconditioning has been developed for symmetric multiprocessor (SMP) cluster architectures with vector processors such as the Earth Simulator. This method is based on a three-level hybrid parallel programming model, which includes message passing for inter-SMP node communication, loop directives by OpenMP for intra-SMP node parallelization and vectorization for each processing element (PE). This method provides robust and smooth convergence and excellent vector and parallel performance in 3D geophysical simulations with contact conditions performed on the Earth Simulator. The selective blocking preconditioning is much more efficient than ILU(1) and ILU(2). Performance for the complicated Southwest Japan model with more than 23 M DOF on 10 SMP nodes (80 PEs) of the Earth Simulator was 161.7 GFLOPS, corresponding to 25.3\% of the peak performance for hybrid programming model, and 190.4 GFLOPS (29.8\% of the peak performance) for flat MPI, respectively.", acknowledgement = ack-nhfb, } @Article{Nakano:2003:SCG, author = "Hirofumi Nakano and Kazuhisa Ishizaka and Motoki Obata and Keiji Kimura and Hironori Kasahara", title = "Static Coarse Grain Task Scheduling with Cache Optimization Using {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "31", number = "3", pages = "211--223", month = jun, year = "2003", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jan 24 14:51:21 MST 2004", bibsource = "http://www.kluweronline.com/issn/0885-7458; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=4&LK=NM; http://ipsapp007.kluweronline.com/content/getfile/4773/33/4/abstract.htm; http://ipsapp007.kluweronline.com/content/getfile/4773/33/4/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Book{Quinn:2003:PPC, author = "Michael J. (Michael Jay) Quinn", title = "Parallel programming in {C} with {MPI} and {OpenMP}", publisher = pub-MCGRAW-HILL, address = pub-MCGRAW-HILL:adr, pages = "xiv + 529", year = "2003", ISBN = "0-07-123265-6, 0-07-282256-2", ISBN-13 = "978-0-07-123265-4, 978-0-07-282256-4", LCCN = "QA76.73.C15 Q55 2003; QA76.73 .C15 Q55 2003", bibdate = "Thu Jun 2 07:26:02 MDT 2005", bibsource = "clavis.ucalgary.ca:2200/UNICORN; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The era of practical parallel programming has arrived, marked by the popularity of the MPI and OpenMP software standards and the emergence of commodity clusters as the hardware platform of choice for an increasing number of organizations. This exciting new book, ``Parallel Programming in C with MPI and OpenMP'' addresses the needs of students and professionals who want to learn how to design, analyze, implement, and benchmark parallel programs in C using MPI and/or OpenMP. It introduces a rock-solid design methodology with coverage of the most important MPI functions and OpenMP directives. It also demonstrates, through a wide range of examples, how to develop parallel programs that will execute efficiently on today's parallel platforms.", acknowledgement = ack-nhfb, subject = "C (Computer program language); Parallel programming (Computer science)", tableofcontents = "Motivation and history \\ Parallel architectures \\ Parallel algorithm design \\ Message-passing programming \\ The sieve of Erathosthenes \\ Floyd's algorithm \\ Performance analysis \\ Matrix--vector multiplication \\ Document classification \\ Monte Carlo methods \\ Matrix multiplication \\ Solving linear systems \\ Finite difference methods \\ Sorting \\ The Fast Fourier Transform \\ Combinatorial search \\ Shared-memory programming \\ Combining MPI and OpenMP", } @Article{Reussner:2003:USD, author = "Ralf H. Reussner", title = "Using {SKaMPI} for developing high-performance {MPI} programs with performance portability", journal = j-FUT-GEN-COMP-SYS, volume = "19", number = "5", pages = "749--759", month = jul, year = "2003", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Jan 10 10:03:34 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", remark = "Tools for Program Development and Analysis. Best papers from two Technical Sessions, at ICCS2001, San Francisco, CA, USA, and ICCS2002, Amsterdam, The Netherlands.", } @Article{Saito:2003:LSP, author = "Hideki Saito and Greg Gaertner and Wesley Jones and Rudolf Eigenmann and Hidetoshi Iwashita and Ron Lieberman and Matthijs van Waveren and Brian Whitney", title = "Large System Performance of {SPEC OMP} Benchmark Suites", journal = j-INT-J-PARALLEL-PROG, volume = "31", number = "3", pages = "197--209", month = jun, year = "2003", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jan 24 14:51:21 MST 2004", bibsource = "http://www.kluweronline.com/issn/0885-7458; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=3&LK=NM; http://ipsapp007.kluweronline.com/content/getfile/4773/33/3/abstract.htm; http://ipsapp007.kluweronline.com/content/getfile/4773/33/3/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Shires:2003:OPF, author = "Dale Shires and Ram Mohan", title = "Optimization and Performance of a {Fortran 90} {MPI}-Based Unstructured Code on Large-Scale Parallel Systems", journal = j-J-SUPERCOMPUTING, volume = "25", number = "2", pages = "131--141", month = jun, year = "2003", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 16 08:27:09 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/journalhome.htm/0920-8542", URL = "http://ipsapp009.kluweronline.com/content/getfile/5189/44/4/abstract.htm; http://ipsapp009.kluweronline.com/content/getfile/5189/44/4/fulltext.pdf", abstract = "The message-passing interface (MPI) has become the standard in achieving effective results when using the message passing paradigm of parallelization. Codes written using MPI are extremely portable and are applicable to both clusters and massively parallel computing platforms. Since MPI uses the single program, multiple data (SPMD) approach to parallelism, good performance requires careful tuning of the serial code as well as careful data and control flow analysis to limit communication. We discuss optimization strategies used and their degree of success to increase performance of an MPI-based unstructured finite element simulation code written in Fortran 90. We discuss performance results based on implementations using several modern massively parallel computing platforms including the SGI Origin 3800, IBM Nighthawk 2 SMP, and Cray T3E-1200.", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Squyres:2003:CAL, author = "Jeffrey M. Squyres", title = "A component architecture for {LAM\slash MPI} (citation only)", journal = j-SIGPLAN, pages = "??--??", year = "2003", CODEN = "SINODQ", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Dec 22 16:52:42 MST 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Takahashi:2003:PEH, author = "Daisuke Takahashi and Mitsuhisa Sato and Taisuke Boku", title = "Performance Evaluation of the {Hitachi SR8000} Using {SPEC OMP2001} Benchmarks", journal = j-INT-J-PARALLEL-PROG, volume = "31", number = "3", pages = "185--196", month = jun, year = "2003", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jan 24 14:51:21 MST 2004", bibsource = "http://www.kluweronline.com/issn/0885-7458; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=2&LK=NM; http://ipsapp007.kluweronline.com/content/getfile/4773/33/2/abstract.htm; http://ipsapp007.kluweronline.com/content/getfile/4773/33/2/fulltext.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @InProceedings{Weatherly:2003:DMS, author = "D. Brent Weatherly and David K. Lowenthal and Mario Nakazawa and Franklin Lowenthal", title = "{Dyn-MPI}: Supporting {MPI} on Non Dedicated Clusters", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10708#1; http://www.sc-conference.org/sc2003/paperpdfs/pap126.pdf", abstract = "Distributing data is a fundamental problem in implementing efficient distributed-memory parallel programs. The problem becomes more difficult in environments where the participating nodes are not dedicated to a parallel application. We are investigating the data distribution problem in non dedicated environments in the context of explicit message-passing programs.\par To address this problem, we have designed and implemented an extension to MPI called Dynamic MPI (Dyn-MPI). The key component of Dyn-MPI is its run-time system, which efficiently and automatically redistributes data on the fly when there are changes in the application or the underlying environment. Dyn-MPI supports efficient memory allocation, precise measurement of system load and computation time, and node removal. Performance results show that programs that use Dyn-MPI execute efficiently in non dedicated environments, including up to almost a three-fold improvement compared to programs that do not redistribute data and a 25\% improvement over standard adaptive load balancing techniques.", acknowledgement = ack-nhfb, } @InProceedings{Worringen:2003:FPN, author = "Joachim Worringen and Jesper Larson Traff and Hubert Ritzdorf", title = "Fast Parallel Non-Contiguous File Access", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10722#0; http://www.sc-conference.org/sc2003/paperpdfs/pap319.pdf", abstract = "Many applications of parallel I/O perform non-contiguous file accesses: instead of accessing a single (large) block of data in a file, a number of (smaller) blocks of data scattered throughout the file needs to be accessed in each logical I/O operation. However, only few file system interfaces directly support this kind of non-contiguous file access. In contrast, the most commonly used parallel programming interface, MPI, incorporates a flexible model of parallel I/O through its MPI-IO interface. With MPI-IO, arbitrary non-contiguous file accesses are supported in a uniform fashion by the use of derived MPI datatypes set up by the user to reflect the desired I/O pattern.\par Despite a considerable amount of recent work in this area, current MPI-IO implementations suffer from low performance of such non-contiguous accesses when compared to the performance of the storage system for contiguous accesses. In this paper we analyze an important bottleneck in the efficient handling of non-contiguous access patterns in current implementations of MPIIO. We present a new technique, termed listless I/O, that can be incorporated into MPI-IO implementations like the well-known ROMIO implementation, and completely eliminates this bottleneck. We have implemented the technique in MPI/SX, the MPI implementation for the NEC SX-series of parallel vector computers. Results with a synthetic benchmark and an application kernel show that listless I/O is able to increase the bandwidth for non-contiguous file access by sometimes more than a factor of 500 when compared to the traditional approach.", acknowledgement = ack-nhfb, } @InProceedings{Ying:2003:NPK, author = "Lexing Ying and George Biros and Denis Zorin and Harper Langston", title = "A new parallel kernel-independent fast multipole method", crossref = "ACM:2003:SII", pages = "??--??", year = "2003", bibdate = "Wed Nov 26 07:34:20 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10707#2; http://www.sc-conference.org/sc2003/paperpdfs/pap166.pdf", abstract = "We present a new adaptive fast multipole algorithm and its parallel implementation. The algorithm is kernel-independent in the sense that the evaluation of pairwise interactions does not rely on any analytic expansions, but only utilizes kernel evaluations. The new method provides the enabling technology for many important problems in computational science and engineering. Examples include viscous flows, fracture mechanics and screened Coulombic interactions. Our MPI-based parallel implementation logically separates the computation and communication phases to avoid synchronization in the upward and downward computation passes, and thus allows us to fully exploit computation and communication overlapping. We measure isogranular and fixed-size scalability for a variety of kernels on the Pittsburgh Supercomputing Center's TCS-1 AlphaServer on up to 3000 processors. We have solved viscous flow problems with up to 2.1 billion unknowns and we have achieved 1.6 Tflops/s peak performance and 1.13 Tflops/s sustained performance.", acknowledgement = ack-nhfb, keywords = "adaptive algorithms; boundary integral equations; Fast multipole methods; massively parallel computing; N-body problems; viscous flows", } @Book{Bisseling:2004:PSC, author = "Rob H. Bisseling", title = "Parallel scientific computation: a structured approach using {BSP} and {MPI}", publisher = pub-OXFORD, address = pub-OXFORD:adr, pages = "xviii + 305", year = "2004", ISBN = "0-19-852939-2", ISBN-13 = "978-0-19-852939-2", LCCN = "QA76.58 .B57 2004", bibdate = "Tue Mar 13 14:00:12 MDT 2007", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", URL = "http://www.loc.gov/catdir/enhancements/fy0617/2004046141-d.html; http://www.loc.gov/catdir/enhancements/fy0617/2004046141-t.html", acknowledgement = ack-nhfb, subject = "Bulk Synchronous Parallel (BSP) model; Message Passing Interface (MPI); Parallel processing (Electronic computers); Scientific applications; Supercomputers; Parallel computers", } @Article{Boeres:2004:ETF, author = "Cristina Boeres and Vinod E. F. Rebello", title = "{EasyGrid}: towards a framework for the automatic {Grid} enabling of legacy {MPI} applications", journal = j-CCPE, volume = "16", number = "5", pages = "425--432", day = "25", month = apr, year = "2004", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.821", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat May 14 11:30:53 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "26 Mar 2004", } @Article{Corbalan:2004:PMD, author = "Julita Corbalan and Xavier Martorell and Jesus Labarta", title = "Page Migration with Dynamic Space-Sharing Scheduling Policies: The Case of the {SGI O2000}", journal = j-INT-J-PARALLEL-PROG, volume = "32", number = "4", pages = "263--288", month = aug, year = "2004", CODEN = "IJPPE5", DOI = "https://doi.org/10.1023/B:IJPP.0000035815.13969.ec", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:05:14 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=4&spage=263", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "CC-NUMA; dynamic processor allocation policy; memory page migration; multiprogrammed workload; OpenMP", } @Article{Cotronis:2004:CMP, author = "Yiannis Cotronis", title = "Composition of {Message Passing Interface} Applications over {MPICH-G2}", journal = j-IJHPCA, volume = "18", number = "3", pages = "327--339", month = "Fall", year = "2004", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342004046047", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/18/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/18/3/327.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Fagg:2004:BUF, author = "Graham E. Fagg and Jack J. Dongarra", title = "Building and Using a Fault-Tolerant {MPI} Implementation", journal = j-IJHPCA, volume = "18", number = "3", pages = "353--361", month = "Fall", year = "2004", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342004046052", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/18/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/18/3/353.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Book{Fernando:2004:GGP, editor = "Randima Fernando", title = "{GPU} gems: programming techniques, tips, and tricks for real-time graphics", volume = "1", publisher = pub-AW, address = pub-AW:adr, pages = "xvv + 765", year = "2004", ISBN = "0-321-22832-4", ISBN-13 = "978-0-321-22832-1", LCCN = "T385 .G6879 2004", bibdate = "Thu Jul 29 13:36:54 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; z3950.loc.gov:7090/Voyager", price = "US\$45.99", series = "GPU gems", acknowledgement = ack-nhfb, keywords = "CUDA; nVIDIA", subject = "Computer graphics; Real-time programming", } @Article{Gropp:2004:FTM, author = "William Gropp and Ewing Lusk", title = "Fault Tolerance in {Message Passing Interface} Programs", journal = j-IJHPCA, volume = "18", number = "3", pages = "363--372", month = "Fall", year = "2004", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342004046045", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/18/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/18/3/363.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Iwasaki:2004:NPS, author = "Hideya Iwasaki and Zhenjiang Hu", title = "A New Parallel Skeleton for General Accumulative Computations", journal = j-INT-J-PARALLEL-PROG, volume = "32", number = "5", pages = "389--414", month = oct, year = "2004", CODEN = "IJPPE5", DOI = "https://doi.org/10.1023/B:IJPP.0000038069.80050.74", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:05:18 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=5&spage=389", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Bird data parallel skeleton; Meertens formalism; MPI; program transformation; Skeletal parallel programming", } @InProceedings{Ke:2004:RCM, author = "Jian Ke and Martin Burtscher and Evan Speight", title = "Runtime Compression of {MPI} Messages to Improve the Performance and Scalability of Parallel Applications", crossref = "ACM:2004:SHP", pages = "59--59", year = "2004", bibdate = "Tue Dec 27 07:57:20 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Kepner:2004:M, author = "Jeremy Kepner and Stan Ahalt", title = "{MatlabMPI}", journal = j-J-PAR-DIST-COMP, volume = "64", number = "8", pages = "997--1005", month = aug, year = "2004", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2004.03.018", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Dec 4 15:15:10 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", abstract = "In many projects the true costs of high performance computing are currently dominated by software. Addressing these costs may require shifting to higher level languages such as Matlab. MatlabMPI is a Matlab implementation of the Message Passing Interface (MPI) standard and allows any Matlab program to exploit multiple processors. MatlabMPI currently implements the basic six functions that are the core of the MPI point-to-point communications standard. The key technical innovation of MatlabMPI is that it implements the widely used MPI ``look and feel'' on top of standard Matlab file I/O, resulting in an extremely compact ($ \approx 350 $ lines of code) and ``pure'' implementation which runs anywhere Matlab runs, and on any heterogeneous combination of computers. The performance has been tested on both shared and distributed memory parallel computers (e.g. Sun, SGI, HP, IBM, Linux, MacOSX and Windows). MatlabMPI can match the bandwidth of C based MPI at large message sizes. A test image filtering application using MatlabMPI achieved a speedup of $ \approx 300 $ using 304 CPUs and $ \approx 15 \% $ of the theoretical peak (450 Gigaflops) on an IBM SP2 at the Maui High Performance Computing Center. In addition, this entire parallel benchmark application was implemented in 70 software-lines-of-code, illustrating the high productivity of this approach. MatlabMPI is available for download on the web (www.ll.mit.edu/MatlabMPI).", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Book{Ladd:2004:GPP, author = "Scott Ladd", title = "Guide to Parallel Programming", publisher = pub-SV, address = pub-SV:adr, pages = "465 (est.)", year = "2004", ISBN = "0-387-40577-1", ISBN-13 = "978-0-387-40577-3", LCCN = "????", bibdate = "Wed Aug 27 06:31:34 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Includes CD-ROM.", acknowledgement = ack-nhfb, tableofcontents = "ntroduction; Supercomputing; Tools for Parallel Programming; Introducing OpenMP; Parallel Loops with Open MP; Advanced OpenMP; Message passing with MPI; Deeper MPI; Design of data and algorithms; Optimization; Debugging the hydra; Parallel in parallel--MPI and OpenMP together; Elaborations; Resources; Index", } @InProceedings{Liu:2004:BMI, author = "Jiuxing Liu and Abhinav Vishnu and Dhabaleswar K. Panda", title = "Building Multirail {InfiniBand} Clusters: {MPI}-Level Design and Performance Evaluation", crossref = "ACM:2004:SHP", pages = "33--33", year = "2004", bibdate = "Tue Dec 27 07:57:20 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Liu:2004:HPR, author = "Jiuxing Liu and Jiesheng Wu and Dhabaleswar K. Panda", title = "High Performance {RDMA}-Based {MPI} Implementation over {InfiniBand}", journal = j-INT-J-PARALLEL-PROG, volume = "32", number = "3", pages = "167--198", month = jun, year = "2004", CODEN = "IJPPE5", DOI = "https://doi.org/10.1023/B:IJPP.0000029272.69895.c1", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 6 16:40:03 MDT 2005", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=3&spage=167", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @InProceedings{Lu:2004:AFS, author = "Charng-da Lu and Daniel A. Reed", title = "Assessing Fault Sensitivity in {MPI} Applications", crossref = "ACM:2004:SHP", pages = "37--37", year = "2004", bibdate = "Tue Dec 27 07:57:20 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Luecke:2004:PSM, author = "Glenn R. Luecke and Marina Kraeva and Jing Yuan and Silvia Spanoyannis", title = "Performance and scalability of {MPI} on {PC} clusters", journal = j-CCPE, volume = "16", number = "1", pages = "79--107", month = jan, year = "2004", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.749", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Jan 13 09:28:19 MST 2004", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "4 Dec 2003", } @Article{Luecke:2004:PSS, author = "Glenn R. Luecke and Silvia Spanoyannis and Marina Kraeva", title = "The performance and scalability of {SHMEM} and {MPI-2} one-sided routines on a {SGI Origin 2000} and a {Cray T3E-600}", journal = j-CCPE, volume = "16", number = "10", pages = "1037--1060", day = "25", month = aug, year = "2004", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.796", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat May 14 11:30:55 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "10 Jun 2004", } @Article{Marowka:2004:OOA, author = "Ami Marowka and Zhenying Liu and Barbara Chapman", title = "{OpenMP-oriented} applications for distributed shared memory architectures", journal = j-CCPE, volume = "16", number = "4", pages = "371--384", day = "10", month = apr, year = "2004", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.752", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat May 14 11:30:53 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "19 Jan 2004", } @Article{Martin:2004:HPA, author = "Mar{\'\i}a J. Mart{\'\i}n and Marta Parada and Ram{\'o}n Doallo", title = "High Performance Air Pollution Simulation Using {OpenMP}", journal = j-J-SUPERCOMPUTING, volume = "28", number = "3", pages = "311--321", month = jun, year = "2004", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Dec 4 12:39:13 MST 2004", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.wkap.nl/journalhome.htm/0920-8542", URL = "http://ipsapp008.kluweronline.com/IPS/content/ext/x/J/5189/I/54/A/5/abstract.htm", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Book{Mertens:2004:CCP, author = "Stephan Mertens and Alexander Schinner", title = "{Cluster Computing: Praktische Einf{\"u}hrung in das wissenschaftliche Rechnen auf Workstation-Clustern}", publisher = pub-SV, address = pub-SV:adr, pages = "300 (est.)", year = "2004", ISBN = "3-540-42299-4", ISBN-13 = "978-3-540-42299-0", LCCN = "????", bibdate = "Wed Aug 27 06:33:33 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Includes CD-ROM.", acknowledgement = ack-nhfb, } @InProceedings{Mohror:2004:PTS, author = "Kathryn Mohror and Karen L. Karavanic", title = "Performance Tool Support for {MPI-2} on {Linux}", crossref = "ACM:2004:SHP", pages = "28--28", year = "2004", bibdate = "Tue Dec 27 07:57:20 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Park:2004:DID, author = "K.-L. Park and H.-J. Lee and O.-Y. Kwon and S.-Y. Park and H.-W. Park and S.-D. Kim", title = "Design and Implementation of a Dynamic Communication {MPI} Library for the Grid", journal = j-INT-J-COMPUT-APPL, volume = "26", number = "3", pages = "1--8", year = "2004", DOI = "https://doi.org/10.1080/1206212X.2004.11441738", ISSN = "1206-212X (print), 1925-7074 (electronic)", ISSN-L = "1206-212X", bibdate = "Sat Apr 21 17:21:44 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijca.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.tandfonline.com/doi/full/10.1080/1206212X.2004.11441738", acknowledgement = ack-nhfb, fjournal = "International Journal of Computers and Applications", journal-URL = "https://www.tandfonline.com/loi/tjca20", online-date = "11 Jul 2015", } @InProceedings{Schulz:2004:IES, author = "Martin Schulz and Greg Bronevetsky and Rohit Fernandes and Daniel Marques and Keshav Pingali and Paul Stodghill", title = "Implementation and Evaluation of a Scalable Application-Level Checkpoint-Recovery Scheme for {MPI} Programs", crossref = "ACM:2004:SHP", pages = "38--38", year = "2004", bibdate = "Tue Dec 27 07:57:20 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Sievert:2004:SMP, author = "Otto Sievert and Henri Casanova", title = "A Simple {MPI} Process Swapping Architecture for Iterative Applications", journal = j-IJHPCA, volume = "18", number = "3", pages = "341--352", month = "Fall", year = "2004", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342004047430", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/18/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/18/3/341.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Skjellum:2004:RTM, author = "Anthony Skjellum and Arkady Kanevsky and Yoginder S. Dandass and Jerrell Watts and Steve Paavola and Dennis Cottel and Greg Henley and L. Shane Hebert and Zhenqian Cui and Anna Rounbehler and {The Real-Time Message Passing Interface (Mpi and Rt) Forum}", title = "The {Real-Time Message Passing Interface Standard (MPI\slash RT-1.1)}", journal = j-CCPE, volume = "16", number = "S1", pages = "Si--S322", day = "25", month = dec, year = "2004", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.744", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat May 14 11:30:56 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "22 Nov 2004", } @Article{Smith:2004:SIP, author = "Kevin B. Smith and Aart J. C. Bik and Xinmin Tian", title = "Support for the {Intel{\reg} Pentium{\reg} 4} Processor with Hyper-Threading Technology in {Intel{\reg}} 8.0 Compilers", journal = j-INTEL-TECH-J, volume = "8", number = "1", pages = "19--31", month = feb, year = "2004", ISSN = "1535-766X", bibdate = "Mon Jul 11 08:46:53 2005", bibsource = "http://developer.intel.com/technology/itj/archive/2004.htm; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://developer.intel.com/technology/itj/2004/volume08issue01/art02_compilers/p01_abstract.htm", acknowledgement = ack-nhfb, keywords = "Compilers; Hyper-Threading Technology; Intel Pentium 4 processor; OpenMP; Optimization; Vectorization", } @Article{Vrenios:2004:PPC, author = "A. Vrenios", title = "{Parallel Programming in C with MPI and OpenMP} [Book Review]", journal = j-IEEE-DISTRIB-SYST-ONLINE, volume = "5", number = "1", pages = "7.1--7.3", month = "????", year = "2004", CODEN = "????", ISSN = "1541-4922 (print), 1558-1683 (electronic)", ISSN-L = "1541-4922", bibdate = "Fri Jul 15 17:50:13 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ieeexplore.ieee.org/iel5/8968/28452/01270716.pdf?isnumber=28452&prod=JNL&arnumber=1270716&arSt=+7.1&ared=+7.3&arAuthor=Vrenios%2C+A.; http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=28452&arnumber=1270716&count=8&index=5", acknowledgement = ack-nhfb, fjournal = "IEEE Distributed Systems Online", } @Book{White:2004:CMM, author = "R. E. (Robert E.) White", title = "Computational Mathematics: Models, Methods, and Analysis with {MATLAB} and {MPI}", publisher = pub-CHAPMAN-HALL-CRC, address = pub-CHAPMAN-HALL-CRC:adr, pages = "xvi + 385", year = "2004", ISBN = "1-58488-364-2", ISBN-13 = "978-1-58488-364-7", LCCN = "QA297 .W495 2004", bibdate = "Tue Apr 26 09:31:54 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", acknowledgement = ack-nhfb, subject = "Numerical analysis; MATLAB; Computer interfaces; Parallel programming (Computer science)", } @Article{Zeyao:2004:AMI, author = "Mo Zeyao and Huang Zhengfeng", title = "Application of {MPI-IO} in Parallel Particle Transport {Monte--Carlo} Simulation", journal = j-PARALLEL-ALGORITHMS-APPL, volume = "19", number = "4", pages = "227--236", month = "????", year = "2004", CODEN = "PAAPEC", DOI = "https://doi.org/10.1080/10637190412331295166", ISSN = "1063-7192", ISSN-L = "1026-7689", bibdate = "Thu Jul 10 21:46:37 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.informaworld.com/smpp/content~content=a714592658", acknowledgement = ack-nhfb, fjournal = "Parallel Algorithms and Applications", journal-URL = "http://www.tandfonline.com/loi/gpaa20", } @Article{Zhang:2004:PMV, author = "Xin Zhang and Lingli Ding and Elke A. Rundensteiner", title = "Parallel multisource view maintenance", journal = j-VLDB-J, volume = "13", number = "1", pages = "22--48", month = jan, year = "2004", CODEN = "VLDBFR", DOI = "https://doi.org/10.1007/s00778-003-0086-0", ISSN = "1066-8888 (print), 0949-877X (electronic)", ISSN-L = "1066-8888", bibdate = "Mon Jun 23 10:51:09 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "In a distributed environment, materialized views are used to integrate data from different information sources and then store them in some centralized location. In order to maintain such materialized views, maintenance queries need to be sent to information sources by the data warehouse management system. Due to the independence of the information sources and the data warehouse, concurrency issues are raised between the maintenance queries and the local update transactions at each information source. Recent solutions such as ECA and Strobe tackle such concurrent maintenance, however with the requirement of quiescence of the information sources. SWEEP and POSSE overcome this limitation by decomposing the global maintenance query into smaller subqueries to be sent to every information source and then performing conflict correction locally at the data warehouse. Note that all these previous approaches handle the data updates {\em one at a time}. Hence either some of the information sources or the data warehouse is likely to be idle during most of the maintenance process. In this paper, we propose that a set of updates should be maintained in parallel by several concurrent maintenance processes so that both the information sources as well as the warehouse would be utilized more fully throughout the maintenance process. This parallelism should then improve the overall maintenance performance. For this we have developed a parallel view maintenance algorithm, called PVM, that substantially improves upon the performance of previous maintenance approaches by handling a set of data updates at the same time. The parallel handling of a set of updates is orthogonal to the particular maintenance algorithm applied to the handling of each individual update. In order to perform parallel view maintenance, we have identified two critical issues that must be overcome: (1) detecting maintenance-concurrent data updates in a parallel mode and (2) correcting the problem that the data warehouse commit order may not correspond to the data warehouse update processing order due to parallel maintenance handling. In this work, we provide solutions to both issues. For the former, we insert a middle-layer timestamp assignment module for detecting maintenance-concurrent data updates without requiring any global clock synchronization. For the latter, we introduce the negative counter concept to solve the problem of variant orders of committing effects of data updates to the data warehouse. We provide a proof of the correctness of PVM that guarantees that our strategy indeed generates the correct final data warehouse state. We have implemented both SWEEP and PVM in our EVE data warehousing system. Our performance study demonstrates that a manyfold performance improvement is achieved by PVM over SWEEP.", acknowledgement = ack-nhfb, fjournal = "VLDB Journal: Very Large Data Bases", journal-URL = "http://portal.acm.org/toc.cfm?id=J869", keywords = "concurrent data updates; data warehousing; parallel view maintenance; performance evaluation", } @Article{Almasi:2005:DIM, author = "G. Alm{\'a}si and C. Archer and J. G. Casta{\~n}os and J. A. Gunnels and C. C. Erway and P. Heidelberger and X. Martorell and J. E. Moreira and K. Pinnow and J. Ratterman and B. D. Steinmacher-Burow and W. Gropp and B. Toonen", title = "Design and implementation of message-passing services for the {Blue Gene/L} supercomputer", journal = j-IBM-JRD, volume = "49", number = "2/3", pages = "393--406", month = "????", year = "2005", CODEN = "IBMJAE", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 1 08:14:41 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", URL = "http://www.research.ibm.com/journal/rd/492/almasi.pdf", abstract = "The Blue Gene/L (BG/L) supercomputer, with 65,536 dual-processor compute nodes, was designed from the ground up to support efficient execution of massively parallel message-passing programs. Part of this support is an optimized implementation of the Message Passing Interface (MPI), which leverages the hardware features of BG/L. MPI for BG/L is implemented on top of a more basic message-passing infrastructure called the message layer. This message layer can be used both to implement other higher-level libraries and directly by applications. MPI and the message layer are used in the two BG/L modes of operation: the coprocessor mode and the virtual node mode. Performance measurements show that our message-passing services deliver performance close to the hardware limits of the machine. They also show that dedicating one of the processors of a node to communication functions (coprocessor mode) greatly improves the message-passing bandwidth, whereas running two processes per compute node (virtual node mode) can have a positive impact on application performance.", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", ordernumber = "G322-0240", } @Article{Aversa:2005:HDS, author = "Rocco Aversa and Beniamino {Di Martino} and Nicola Mazzocca and Salvatore Venticinque", title = "A hierarchical distributed-shared memory parallel {Branch \& Bound} application with {PVM} and {OpenMP} for multiprocessor clusters", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1034--1047", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Aversa:2005:PPT, author = "Rocco Aversa and Beniamino {Di Martino} and Massimiliano Rak and Salvatore Venticinque and Umberto Villano", title = "Performance prediction through simulation of a hybrid {MPI\slash OpenMP} application", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1013--1033", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Bernaschi:2005:ERA, author = "Massimo Bernaschi and Giulio Iannello and Saverio Crea", title = "Experimental Results About {MPI} Collective Communication Operations", journal = j-PARALLEL-PROCESS-LETT, volume = "15", number = "1/2", pages = "223--236", month = mar # "\slash " # jun, year = "2005", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626405002179", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Bhanot:2005:OTL, author = "G. Bhanot and A. Gara and P. Heidelberger and E. Lawless and J. C. Sexton and R. Walkup", title = "Optimizing task layout on the {Blue Gene/L} supercomputer", journal = j-IBM-JRD, volume = "49", number = "2/3", pages = "489--500", month = "????", year = "2005", CODEN = "IBMJAE", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 1 08:14:41 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", URL = "http://www.research.ibm.com/journal/rd/492/bhanot.pdf", abstract = "A general method for optimizing problem layout on the Blue Gene/L (BG/L) supercomputer is described. The method takes as input the communication matrix of an arbitrary problem as an array with entries $ C(i, j) $, which represents the data communicated from domain $i$ to domain $j$. Given $ C(i, j) $, we implement a heuristic map that attempts to sequentially map a domain and its communication neighbors either to the same BG/L node or to near-neighbor nodes on the BG/L torus, while keeping the number of domains mapped to a BG/L node constant. We then generate a Markov chain of maps using Monte Carlo simulation with free energy $ F = \sum_{i, j} C(i, j)H(i, j) $, where $ H(i, j) $ is the smallest number of hops on the BG/L torus between domain $i$ and domain $j$. For two large parallel applications, SAGE and UMT2000, the method was tested against the default Message Passing Interface rank order layout on up to 2,048 BG/L nodes. It produced maps that improved communication efficiency by up to 45\%.", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", ordernumber = "G322-0240", } @Article{Blikberg:2005:LBO, author = "R. Blikberg and T. S{\o}revik", title = "Load balancing and {OpenMP} implementation of nested parallelism", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "984--998", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Brightwell:2005:AIO, author = "Ron Brightwell and Rolf Riesen and Keith D. Underwood", title = "Analyzing the Impact of Overlap, Offload, and Independent Progress for {Message Passing Interface} Applications", journal = j-IJHPCA, volume = "19", number = "2", pages = "103--117", month = "Summer", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005054257", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/2/103.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Chan:2005:CCI, author = "Albert Chan and Frank Dehne and Ryan Taylor", title = "{CGMGRAPH\slash CGMLIB}: Implementing and Testing {CGM} Graph Algorithms on {PC} Clusters and Shared Memory Machines", journal = j-IJHPCA, volume = "19", number = "1", pages = "81--97", month = "Spring", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005051196", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/1.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/1/81.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Chapman:2005:O, author = "Barbara M. Chapman and Federico Massaioli", title = "{OpenMP}", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "957--959", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dalcin:2005:MP, author = "Lisandro Dalc{\'\i}n and Rodrigo Paz and Mario Storti", title = "{MPI} for {Python}", journal = j-J-PAR-DIST-COMP, volume = "65", number = "9", pages = "1108--1115", month = sep, year = "2005", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:33 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Duran:2005:RAP, author = "A. Duran and R. Silvera and J. Corbalan and J. Labarta", booktitle = "Shared Memory Parallel Programming with {OpenMP}", title = "Runtime Adjustment of Parallel Nested Loops", journal = j-LECT-NOTES-COMP-SCI, volume = "3349", pages = "137--??", year = "2005", bibdate = "Mon Oct 07 09:29:01 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Eleftheriou:2005:SFF, author = "M. Eleftheriou and B. G. Fitch and A. Rayshubskiy and T. J. C. Ward and R. S. Germain", title = "Scalable framework for {$3$D} {FFTs} on the {Blue Gene/L} supercomputer: Implementation and early performance measurements", journal = j-IBM-JRD, volume = "49", number = "2/3", pages = "457--464", month = "????", year = "2005", CODEN = "IBMJAE", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 1 08:14:41 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", URL = "http://www.research.ibm.com/journal/rd/492/eleftheriou.pdf", abstract = "This paper presents results on a communications-intensive kernel, the three-dimensional fast Fourier transform (3D FFT), running on the 2,048-node Blue Gene/L (BG/L) prototype. Two implementations of the volumetric FFT algorithm were characterized, one built on the Message Passing Interface library and another built on an active packet Application Program Interface supported by the hardware bring-up environment, the BG/L advanced diagnostics environment. Preliminary performance experiments on the BG/L prototype indicate that both of our implementations scale well up to 1,024 nodes for $3$D FFTs of size $ 128 \time 128 \times 128 $. The performance of the volumetric FFT is also compared with that of the Fastest Fourier Transform in the West (FFTW) library. In general, the volumetric FFT outperforms a port of the FFTW Version 2.1.5 library on large-node-count partitions.", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", ordernumber = "G322-0240", } @Article{Florez:2005:LMM, author = "German Florez and Zhen Liu and Susan M. Bridges and Anthony Skjellum and Rayford B. Vaughn", title = "Lightweight monitoring of {MPI} programs in real time", journal = j-CCPE, volume = "17", number = "13", pages = "1547--1578", month = nov, year = "2005", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.889", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Oct 4 06:07:02 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "24 Jun 2005", } @Article{Floros:2005:TGS, author = "Evangelos Floros and Yiannis Cotronis", title = "Towards a {Grid} Services Based Framework for the Virtualization, Execution and Composition of {MPI} Applications", journal = j-PARALLEL-PROCESS-LETT, volume = "15", number = "1/2", pages = "85--98", month = mar # "\slash " # jun, year = "2005", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626405002076", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Gabriel:2005:EDC, author = "Edgar Gabriel and Graham E. Fagg and Jack J. Dongarra", title = "Evaluating Dynamic Communicators and One-Sided Operations for Current {MPI} Libraries", journal = j-IJHPCA, volume = "19", number = "1", pages = "67--79", month = "Spring", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005051197", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/1/67.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Grove:2005:CBP, author = "D. A. Grove and P. D. Coddington", title = "Communication Benchmarking and Performance Modelling of {MPI} Programs on Cluster Computers", journal = j-J-SUPERCOMPUTING, volume = "34", number = "2", pages = "201--217", month = nov, year = "2005", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-005-2340-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:26 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=34&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=34&issue=2&spage=201", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "cluster computing; parallel computing; performance modelling", } @Article{Hadjidoukas:2005:OEM, author = "P. E. Hadjidoukas and T. S. Papatheodorou", title = "{OpenMP} extensions for master-slave message passing computing", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1155--1167", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Huang:2005:TME, author = "Lei Huang and Barbara Chapman and Zhenying Liu", title = "Towards a more efficient implementation of {OpenMP} for clusters via translation to global arrays", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1114--1139", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Hurwitz:2005:AMP, author = "Justin (Gus) Hurwitz and Wu-chun Feng", title = "Analyzing {MPI} performance over 10-Gigabit {Ethernet}", journal = j-J-PAR-DIST-COMP, volume = "65", number = "10", pages = "1253--1260", month = oct, year = "2005", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:34 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Ierotheou:2005:GOC, author = "C. S. Ierotheou and H. Jin and G. Matthews and S. P. Johnson and R. Hood", title = "Generating {OpenMP} code using an interactive parallelization environment", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "999--1012", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Izaguirre:2005:PMS, author = "Jes{\'u}s A. Izaguirre and Scott S. Hampton and Thierry Matthey", title = "Parallel multigrid summation for the {$N$}-body problem", journal = j-J-PAR-DIST-COMP, volume = "65", number = "8", pages = "949--962", month = aug, year = "2005", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:33 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", abstract = "An $ \Theta (n) $ parallel multigrid summation method (MG) for the N-body problem is presented. The method was originally devised for vacuum boundary conditions. Here, it is extended to periodic boundary conditions and implemented in parallel using force decomposition and MPI. MG is based on a hierarchical decomposition of computational kernels on multiple grids. For low accuracy calculations, appropriate for molecular dynamics, a sequential implementation is as fast or faster than particle mesh Ewald (PME). Our parallel implementation is more scalable than PME. The method can be combined with multiple time stepping integrators to produce a powerful simulation protocol for simulation of biological molecules and other materials. The parallel implementation is tested on both a Linux cluster with Myrinet interconnect and a shared memory computer. It is available as open-source at http://protomol.sourceforge.net. An auxiliary tool allows the automatic selection of optimal parameters for MG, and is available at http://mdsimaid.cse.nd.edu.", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Jost:2005:WMP, author = "G. Jost and J. Labarta and J. Gimenez", editor = "????", booktitle = "Shared Memory Parallel Programming with {OpenMP}", title = "What Multilevel Parallel Programs do when you are not watching: a Performance analysis case study comparing {MPI\slash OpenMP}, {MLP}, and {Nested OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "3349", pages = "29--??", year = "2005", bibdate = "Mon Oct 07 09:04:25 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Jung:2005:DIM, author = "Hyungsoo Jung and Dongin Shin and Hyuck Han and Jai W. Kim and Heon Y. Yeom and Jongsuk Lee", title = "Design and Implementation of Multiple Fault-Tolerant {MPI} over {Myrinet} ({$ M^3 $})", crossref = "ACM:2005:PAI", pages = "32--32", year = "2005", bibdate = "Tue Dec 27 07:58:16 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Kamal:2005:SVT, author = "Humaira Kamal and Brad Penoff and Alan Wagner", title = "{SCTP} versus {TCP} for {MPI}", crossref = "ACM:2005:PAI", pages = "30--30", year = "2005", bibdate = "Tue Dec 27 07:58:16 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @InProceedings{Kappiah:2005:JTD, author = "Nandini Kappiah and Vincent W. Freeh and David K. Lowenthal", title = "Just In Time Dynamic Voltage Scaling: Exploiting Inter-Node Slack to Save Energy in {MPI} Programs", crossref = "ACM:2005:PAI", pages = "33--33", year = "2005", bibdate = "Tue Dec 27 07:58:16 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Karwande:2005:MPC, author = "Amit Karwande and Xin Yuan and David K. Lowenthal", title = "An {MPI} prototype for compiled communication on {Ethernet} switched clusters", journal = j-J-PAR-DIST-COMP, volume = "65", number = "10", pages = "1123--1133", month = oct, year = "2005", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:34 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Misc{Kepner:2005:PPM, author = "Jeremy Kepner", title = "Parallel Programming with {MatlabMPI}", howpublished = "World-Wide Web site.", year = "2005", bibdate = "Mon Dec 05 08:36:15 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.ll.mit.edu/MatlabMPI/", abstract = "MatlabMPI is set of Matlab scripts that implement a subset of MPI and allow any Matlab program to be run on a parallel computer. The key innovation of MatlabMPI is that it implements the widely used MPI ``look and feel'' on top of standard Matlab file i/o, resulting in a ``pure'' Matlab implementation that is exceedingly small (about 300 lines of code). Thus, MatlabMPI will run on any combination of computers that Matlab supports. In addition, because of its small size, it is simple to download and use (and modify if you like).", acknowledgement = ack-nhfb, keywords = "Matlab; MatlabMPI; MPI; parallel processing", } @Article{Kranzlmuller:2005:RAP, author = "Dieter Kranzlm{\"u}ller and Peter Kacsuk and Jack Dongarra", title = "Recent Advances in {Parallel Virtual Machine} and {Message Passing Interface}", journal = j-IJHPCA, volume = "19", number = "2", pages = "99--101", month = "Summer", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005054256", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/2/99.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Liu:2005:EIO, author = "Z. Liu and L. Huang and B. Chapman and T. Weng", booktitle = "Shared Memory Parallel Programming with {OpenMP}", title = "Efficient Implementation of {OpenMP} for Clusters with Implicit Data Distribution", journal = j-LECT-NOTES-COMP-SCI, volume = "3349", pages = "121--??", year = "2005", bibdate = "Mon Oct 07 09:16:10 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Marowka:2005:EMT, author = "Ami Marowka", title = "Execution model of three parallel languages: {OpenMP}, {UPC} and {CAF}", journal = j-SCI-PROG, volume = "13", number = "2", pages = "127--135", month = "????", year = "2005", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Wed Sep 1 14:50:28 MDT 2010", bibsource = "http://www.iospress.nl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Martorell:2005:BGP, author = "X. Martorell and N. Smeds and R. Walkup and J. R. Brunheroto and G. Alm{\'a}si and J. A. Gunnels and L. DeRose and J. Labarta and F. Escal{\'e} and J. Gim{\'e}nez and H. Servat and J. E. Moreira", title = "{Blue Gene/L} performance tools", journal = j-IBM-JRD, volume = "49", number = "2/3", pages = "407--424", month = "????", year = "2005", CODEN = "IBMJAE", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 1 08:14:41 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", URL = "http://www.research.ibm.com/journal/rd/492/martorell.pdf", abstract = "Good performance monitoring is the basis of modern performance analysis tools for application optimization. We are providing a variety of such performance analysis tools for the new Blue Gene/L supercomputer. Those tools can be divided into two categories: single-node performance tools and multinode performance tools. From a single-node perspective, we provide standard interfaces and libraries, such as PAPI and libHPM, that provide access to the hardware performance counters for applications running on the Blue Gene/L compute nodes. From a multinode perspective, we focus on tools that analyze Message Passing Interface (MPI) behavior. Those tools work by first collecting message-passing trace data when a program runs. The trace data is then used by graphical interface tools that analyze the behavior of applications. Using the current prototype tools, we demonstrate their usefulness and applicability with case studies of application optimization.", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", ordernumber = "G322-0240", } @Article{Massaioli:2005:OPA, author = "Federico Massaioli and Filippo Castiglione and Massimo Bernaschi", title = "{OpenMP} parallelization of agent-based models", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1066--1081", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Book{Mattson:2005:PPP, author = "Timothy G. Mattson and Beverly A. Sanders and Berna Massingill", title = "Patterns for Parallel Programming", publisher = pub-AW, address = pub-AW:adr, pages = "xiii + 355", year = "2005", ISBN = "0-321-22811-1 (hardcover)", ISBN-13 = "978-0-321-22811-6 (hardcover)", LCCN = "QA76.642 .M38 2005", bibdate = "Sat Oct 5 10:09:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", URL = "http://www.loc.gov/catdir/toc/ecip0418/2004013240.html", abstract = "The Parallel Programming Guide for Every Software Developer From grids and clusters to next-generation game consoles, parallel computing is going mainstream. Innovations such as Hyper-Threading Technology, HyperTransport Technology, and multicore microprocessors from IBM, Intel, and Sun are accelerating the movement's growth. Only one thing is missing: programmers with the skills to meet the soaring demand for parallel software. That's where Patterns for Parallel Programming comes in. It's the first parallel programming guide written specifically to serve working software developers, not just computer scientists. The authors introduce a complete, highly accessible pattern language that will help any experienced developer ``think parallel''-and start writing effective parallel code almost immediately. Instead of formal theory, they deliver proven solutions to the challenges faced by parallel programmers, and pragmatic guidance for using today's parallel APIs in the real world. Coverage includes: Understanding the parallel computing landscape and the challenges faced by parallel developers Finding the concurrency in a software design problem and decomposing it into concurrent tasks Managing the use of data across tasks Creating an algorithm structure that effectively exploits the concurrency you've identified Connecting your algorithmic structures to the APIs needed to implement them Specific software constructs for implementing parallel programs Working with today's leading parallel programming environments: OpenMP, MPI, and Java Patterns have helped thousands of programmers master object-oriented development and other complex programming technologies. With this book, you will learn that they're the best way to master parallel programming too.", acknowledgement = ack-nhfb, author-dates = "1958--", subject = "Parallel programming (Computer science)", tableofcontents = "1: A pattern language for parallel programming \\ 2: Background and jargon of parallel computing \\ 3: The finding concurrency design space \\ 4: The algorithm structure design space \\ 5: The supporting structures design space \\ 6: The implementation mechanisms design space \\ Appendix A: A brief introduction to OpenMP \\ Appendix B: A brief introduction to MPI \\ Appendix C: A brief introduction to concurrent programming in Java", } @InProceedings{Mavriplis:2005:HRAa, author = "Dimitri J. Mavriplis and Michael J. Aftosmis and Marsha Berger", title = "High Resolution Aerospace Applications using the {NASA Columbia Supercomputer}", crossref = "ACM:2005:PAI", pages = "61--61", year = "2005", bibdate = "Tue Dec 27 07:58:16 MST 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper focuses on the parallel performance of two high-performance aerodynamic simulation packages on the newly installed NASA Columbia supercomputer. These packages include both a high-fidelity, unstructured, Reynolds-averaged Navier--Stokes solver, and a fully-automated inviscid flow package for cut-cell Cartesian grids. The complementary combination of these two simulation codes enables high-fidelity characterization of aerospace vehicle design performance over the entire flight envelope through extensive parametric analysis and detailed simulation of critical regions of the flight envelope. Both packages are industrial-level codes designed for complex geometry and incorporate customized multigrid solution algorithms. The performance of these codes on Columbia is examined using both MPI and OpenMP and using both the NUMAlink and InfiniBand interconnect fabrics. Numerical results demonstrate good scalability on up to 2016 cpus using the NUMAlink4 interconnect, with measured computational rates in the vicinity of 3 TFLOP/s, while InfiniBand showed some performance degradation at high CPU counts, particularly with multigrid. Nonetheless, the results are encouraging enough to indicate that larger test cases using combined MPI/OpenMP communication should scale well on even more processors.", acknowledgement = ack-nhfb, remark = "Co-winner of best paper award. Also published in \cite{Mavriplis:2007:HRAb}.", } @Article{Medvedev:2005:OMA, author = "Dmitry M. Medvedev and Evelyn M. Goldfield and Stephen K. Gray", title = "An {OpenMP\slash MPI} approach to the parallelization of iterative four-atom quantum mechanics", journal = j-COMP-PHYS-COMM, volume = "166", number = "2", pages = "94--108", day = "1", month = mar, year = "2005", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2004.11.001", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:41:51 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465504005260", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Midorikawa:2005:PNM, author = "Edson Toshimi Midorikawa and Helio Marci Oliveira and Jean Marcos Laine", title = "{PEMPIs}: a New Methodology for Modeling and Prediction of {MPI} Programs Performance", journal = j-INT-J-PARALLEL-PROG, volume = "33", number = "5", pages = "499--527", month = oct, year = "2005", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-005-7303-y", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:05:39 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=5&spage=499", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "analytical modeling; graphical models; message passing; MPI; Performance prediction", } @Article{Nagle:2005:BRM, author = "Dan Nagle", title = "Book Review: {{\em MPI --- The Complete Reference, Vol. 1, The MPI Core}, 2nd ed., Scientific and Engineering Computation Series, by Marc Snir, Steve Otto, Steven Huss--Lederman, David Walker and Jack Dongarra}", journal = j-SCI-PROG, volume = "13", number = "1", pages = "57--63", month = "????", year = "2005", CODEN = "SCIPEV", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Wed Sep 1 14:50:28 MDT 2010", bibsource = "http://www.iospress.nl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Nakajima:2005:PIS, author = "Kengo Nakajima", title = "Parallel iterative solvers for finite-element methods using an {OpenMP\slash MPI} hybrid programming model on the {Earth Simulator}", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1048--1065", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Nakajima:2005:TLH, author = "Kengo Nakajima", title = "Three-level hybrid vs. flat {MPI} on the {Earth Simulator}: Parallel iterative solvers for finite-element method", journal = j-APPL-NUM-MATH, volume = "54", number = "2", pages = "237--255", month = jul, year = "2005", CODEN = "ANMAEL", ISSN = "0168-9274 (print), 1873-5460 (electronic)", ISSN-L = "0168-9274", bibdate = "Tue Aug 24 11:17:20 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/01689274", acknowledgement = ack-nhfb, fjournal = "Applied Numerical Mathematics: Transactions of IMACS", journal-URL = "http://www.sciencedirect.com/science/journal/01689274", } @Article{Norcen:2005:HPJ, author = "Roland Norcen and Andreas Uhl", title = "High performance {JPEG 2000} and {MPEG-4 VTC} on {SMPs} using {OpenMP}", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1082--1098", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Orlando:2005:PSP, author = "Salvatore Orlando and Domenico Laforenza", title = "Preface: Selected Papers from the {EUROPVM\slash MPI 2003 Conference, Venice, Italy, 29 September--2 October 2003}", journal = j-IJHPCA, volume = "19", number = "1", pages = "47--47", month = "Spring", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005051520", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/1/47.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Park:2005:SOA, author = "Inho Park and Seon Wook Kim", title = "Study of {OpenMP} applications on the {InfiniBand}-based software distributed shared-memory system", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "1099--1113", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Book{Pharr:2005:GGP, editor = "Matt Pharr and Randima Fernando", title = "{GPU} gems 2: programming techniques for high-performance graphics and general-purpose computation", volume = "2", publisher = pub-AW, address = pub-AW:adr, pages = "xlix + 814", year = "2005", ISBN = "0-321-33559-7 (hardcover)", ISBN-13 = "978-0-321-33559-3 (hardcover)", LCCN = "T385 .G688 2005", bibdate = "Thu Jul 29 13:36:54 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/numana2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; z3950.loc.gov:7090/Voyager", series = "GPU gems", URL = "http://www-docs.tu-cottbus.de/bibliothek/public/katalog/420569.PDF; http://www.loc.gov/catdir/toc/ecip055/2004030181.html", abstract = "This sequel to the best-selling, first volume of GPU Gems details the latest programming techniques for today's graphics processing units (GPUs). As GPUs find their way into mobile phones, handheld gaming devices, and consoles, GPU expertise is even more critical in today's competitive environment. Real-time graphics programmers will discover the latest algorithms for creating advanced visual effects, strategies for managing complex scenes, and techniques for advanced image processing. Readers will also learn new methods for using the substantial processing power of the GPU in other computationally intensive applications, such as scientific computing and finance. Twenty of the book's forty-eight chapters are devoted to GPGPU programming, from basic concepts to advanced techniques. Written by experts in cutting-edge GPU programming, this book offers readers practical means to harness the enormous capabilities of GPUs.", acknowledgement = ack-nhfb, keywords = "CUDA; nVIDIA", remark = "CD-ROM contents: Complementary examples and samples.", } @InProceedings{Pjesivac-Grbovic:2005:PAM, author = "J. Pjesivac-Grbovic and T. Angskun and G. Bosilca and G. E. Fagg and E. Gabriel and J. J. Dongarra", title = "Performance Analysis of {MPI} Collective Operations", crossref = "IEEE:2005:IPD", pages = "272a-272a", year = "2005", bibdate = "Fri May 27 10:13:34 2005", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Article{Rantakokko:2005:DMO, author = "Jarmo Rantakokko", title = "A Dynamic {MPI--OpenMP} Model for Structured Adaptive Mesh Refinement", journal = j-PARALLEL-PROCESS-LETT, volume = "15", number = "1/2", pages = "37--47", month = mar # "\slash " # jun, year = "2005", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626405002040", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Roberti:2005:PIL, author = "Debora R. Roberti and Roberto P. Souto and Haroldo F. Campos Velho and Gervasio A. Degrazia and Domenico Anfossi", title = "Parallel Implementation of a {Lagrangian} Stochastic Model for Pollutant Dispersion", journal = j-INT-J-PARALLEL-PROG, volume = "33", number = "5", pages = "485--498", month = oct, year = "2005", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-005-7302-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:05:39 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=5&spage=485", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "High performance application; MPI; pollutant dispersion", } @Article{Rufai:2005:MPO, author = "Raimi Rufai and Muslim Bozyigit and Jaralla Alghamdi and Moataz Ahmed", title = "Multithreaded Parallelism with {OpenMP}", journal = j-PARALLEL-PROCESS-LETT, volume = "15", number = "4", pages = "367--378", month = dec, year = "2005", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626405002283", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Sankaran:2005:LMC, author = "Sriram Sankaran and Jeffrey M. Squyres and Brian Barrett and Vishal Sahay and Andrew Lumsdaine and Jason Duell and Paul Hargrove and Eric Roman", title = "The {LAM\slash MPI} Checkpoint\slash Restart Framework: System-Initiated Checkpointing", journal = j-IJHPCA, volume = "19", number = "4", pages = "479--493", month = "Winter", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005056139", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/4.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/4/479.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Santhanaraman:2005:DZC, author = "Gopalakrishnan Santhanaraman and Jiesheng Wu and Wei Huang and Dhabaleswar K. Panda", title = "Designing Zero-Copy {Message Passing Interface} Derived Datatype Communication Over {Infiniband}: Alternative Approaches and Performance Evaluation", journal = j-IJHPCA, volume = "19", number = "2", pages = "129--142", month = "Summer", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005054259", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/2/129.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Selikhov:2005:CMB, author = "A. Selikhov and C. Germain", title = "A {Channel Memory} based fault tolerance for {MPI} applications", journal = j-FUT-GEN-COMP-SYS, volume = "21", number = "5", pages = "709--715", month = may, year = "2005", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jul 15 08:00:46 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Book{Sloan:2005:HPL, author = "Joseph D. (Joseph Donald) Sloan", title = "High performance {Linux} clusters with {OSCAR}, {Rocks}, {openMosix}, and {MPI}", publisher = pub-ORA, address = pub-ORA:adr, pages = "xv + 350", year = "2005", ISBN = "0-596-00570-9", ISBN-13 = "978-0-596-00570-2", LCCN = "QA76.58; QA76.58 .S56 2005eb; QA76.58 .S56 2005; QA76.58 .S58 2005; QA76.58 .S595 2005", bibdate = "Tue Aug 5 17:41:39 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; melvyl.cdlib.org:210/CDL90", URL = "http://www.oreilly.com/catalog/9780596005702", acknowledgement = ack-nhfb, subject = "Linux; Parallel processing (Electronic computers); Electronic data processing; Distributed processing", } @Article{Thakur:2005:OCC, author = "Rajeev Thakur and Rolf Rabenseifner and William Gropp", title = "Optimization of Collective Communication Operations in {MPICH}", journal = j-IJHPCA, volume = "19", number = "1", pages = "49--66", month = "Spring", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005051521", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/1/49.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Thakur:2005:OSO, author = "Rajeev Thakur and William Gropp and Brian Toonen", title = "Optimizing the Synchronization Operations in {Message Passing Interface} One-Sided Communication", journal = j-IJHPCA, volume = "19", number = "2", pages = "119--128", month = "Summer", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005054258", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/2/119.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Tian:2005:CEN, author = "Xinmin Tian and Jay P. Hoeflinger and Grant Haab and Yen-Kuang Chen and Milind Girkar and Sanjiv Shah", title = "A compiler for exploiting nested parallelism in {OpenMP} programs", journal = j-PARALLEL-COMPUTING, volume = "31", number = "10--12", pages = "960--983", month = oct # "\slash " # dec, year = "2005", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:04 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tian:2005:PCT, author = "Xinmin Tian and Milind Girkar and Aart Bik and Hideki Saito", title = "Practical Compiler Techniques on Efficient Multithreaded Code Generation for {OpenMP} Programs", journal = j-COMP-J, volume = "48", number = "5", pages = "588--601", month = sep, year = "2005", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxh109", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Tue Nov 8 05:58:50 MST 2005", bibsource = "http://comjnl.oxfordjournals.org/content/vol48/issue5/index.dtl; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://comjnl.oxfordjournals.org/cgi/content/abstract/48/5/588; http://comjnl.oxfordjournals.org/cgi/reprint/48/5/588", acknowledgement = ack-nhfb, fjournal = "The Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", } @InProceedings{Wiese:2005:IPN, author = "Kay C. Wiese and Andrew Hendriks and Alain Deschenes and Belgacem {Ben Youssef}", title = "The Impact of Pseudorandom Number Quality on {P-RnaPredict}, a Parallel Genetic Algorithm for {RNA} Secondary Structure Prediction", crossref = "Beyer:2005:GEC", pages = "479--480", year = "2005", DOI = "https://doi.org/10.1145/1068009.1068089", bibdate = "Mon Mar 5 22:02:35 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.bham.ac.uk/~wbl/biblio/gecco2005lbp/papers/52-wiese.pdf", abstract = "We present a parallel version of RnaPredict, a genetic algorithm (GA) for RNA secondary structure prediction. The research presented here builds on previous work and examines the impact of three different pseudorandom number generators (PRNGs) on the GA's performance. The three generators tested are the C standard library PRNG RAND, a parallelised multiplicative congruential generator (MCG), and a parallelized Mersenne Twister (MT). A fully parallel version of RnaPredict using the Message Passing Interface (MPI) was implemented. The PRNG comparison tests were performed with known structures that are 118, 122, 543, and 556 nucleotides in length. The effects of the PRNGs are investigated and the predicted structures are compared to known structures", acknowledgement = ack-nhfb, } @Article{Willcock:2005:UMC, author = "Jeremiah Willcock and Andrew Lumsdaine and Arch Robison", title = "Using {MPI} with {C\#} and the {Common Language Infrastructure}", journal = j-CCPE, volume = "17", number = "7--8", pages = "895--917", month = jun # "\slash " # jul, year = "2005", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.861", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat May 14 11:30:57 MDT 2005", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "23 Feb 2005", } @Article{Yu:2005:HPB, author = "Weikuan Yu and Sayantan Sur and Dhabaleswar K. Panda and Rob T. Aulwes and Rich L. Graham", title = "High Performance Broadcast Support in {LA-MPI} Over Quadrics", journal = j-IJHPCA, volume = "19", number = "4", pages = "453--463", month = "Winter", year = "2005", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342005056145", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/19/4.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/19/4/453.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Zhang:2005:ULC, author = "Youhui Zhang and Dongsheng Wong and Weimin Zheng", title = "User-level checkpoint and recovery for {LAM\slash MPI}", journal = j-OPER-SYS-REV, volume = "39", number = "3", pages = "72--81", month = jul, year = "2005", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Sat Aug 26 08:55:48 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", } @Article{Zheng:2005:SBP, author = "Gengbin Zheng and Terry Wilmarth and Praveen Jagadishprasad and Laxmikant V. Kal{\'e}", title = "Simulation-Based Performance Prediction for Large Parallel Machines", journal = j-INT-J-PARALLEL-PROG, volume = "33", number = "2--3", pages = "183--207", month = jun, year = "2005", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-005-3582-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:05:27 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=2&spage=183", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "adaptive MPI; CHARMH; computation modeling; large parallel machines; Simulation-based performance prediction", } @Article{Ayguade:2006:ENO, author = "Eduard Ayguade and Marc Gonzalez and Xavier Martorell and Gabriele Jost", title = "Employing nested {OpenMP} for the parallelization of multi-zone computational fluid dynamics applications", journal = j-J-PAR-DIST-COMP, volume = "66", number = "5", pages = "686--697", month = may, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:34 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Barton:2006:SMP, author = "Christopher Barton and C{\u{a}}lin Cas{\c{c}}aval and George Alm{\'a}si and Yili Zheng and Montse Farreras and Siddhartha Chatterje and Jos{\'e} Nelson Amaral", title = "Shared memory programming for large scale machines", journal = j-SIGPLAN, volume = "41", number = "6", pages = "108--117", month = jun, year = "2006", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1133981.1133995", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jun 18 10:42:48 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper describes the design and implementation of a scalable run-time system and an optimizing compiler for Unified Parallel C (UPC). An experimental evaluation on BlueGene/L{\reg}, a distributed-memory machine, demonstrates that the combination of the compiler with the runtime system produces programs with performance comparable to that of efficient MPI programs and good performance scalability up to hundreds of thousands of processors. Our runtime system design solves the problem of maintaining shared object consistency efficiently in a distributed memory machine. Our compiler infrastructure simplifies the code generated for parallel loops in UPC through the elimination of affinity tests, eliminates several levels of indirection for accesses to segments of shared arrays that the compiler can prove to be local, and implements remote update operations through a lower-cost asynchronous message. The performance evaluation uses three well-known benchmarks --- HPC RandomAccess, HPC STREAM and NAS CG --- to obtain scaling and absolute performance numbers for these benchmarks on up to 131072 processors, the full BlueGene/L machine. These results were used to win the HPC Challenge Competition at SC05 in Seattle WA, demonstrating that PGAS languages support both productivity and performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "BlueGene; PGAS programming model; UPC", } @Article{Battre:2006:MFP, author = "Dominic Battr{\'e} and David Sigfredo Angulo", title = "{MPI} framework for parallel searching in large biological databases", journal = j-J-PAR-DIST-COMP, volume = "66", number = "12", pages = "1503--1511", month = dec, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:35 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Becciani:2006:FMP, author = "U. Becciani and M. Comparato and V. Antonuccio-Delogu", title = "{FLY MPI-2}: a parallel tree code for {LSS}", journal = j-COMP-PHYS-COMM, volume = "174", number = "7", pages = "605--606", day = "1", month = apr, year = "2006", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2006.01.002", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Feb 14 14:46:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465506000713", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bouteiller:2006:HPS, author = "Aur{\'e}lien Bouteiller and Hinde-Lilia Bouziane and Thomas Herault and Pierre Lemarinier and Franck Cappello", title = "Hybrid Preemptive Scheduling of {Message Passing Interface} Applications on {Grids}", journal = j-IJHPCA, volume = "20", number = "1", pages = "77--90", month = "Spring", year = "2006", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342006062526", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/20/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/20/1/77.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Bouteiller:2006:MVP, author = "A. Bouteiller and T. Herault and G. Krawezik and P. Lemarinier and F. Cappello", title = "{MPICH-V} Project: a Multiprotocol Automatic Fault-Tolerant {MPI}", journal = j-IJHPCA, volume = "20", number = "3", pages = "319--333", month = "Fall", year = "2006", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342006067469", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/20/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/20/3/319.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Costa:2006:ROA, author = "J. J. Costa and T. Cortes and X. Martorell and E. Ayguade and J. Labarta", title = "Running {OpenMP} applications efficiently on an everything-shared {SDSM}", journal = j-J-PAR-DIST-COMP, volume = "66", number = "5", pages = "647--658", month = may, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:34 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @InProceedings{delCuvillo:2006:LOC, author = "Juan del Cuvillo and Weirong Zhu and Guang Gao", title = "Landing {OpenMP} on {Cyclops-64}: an efficient mapping of {OpenMP} to a many-core system-on-a-chip", crossref = "ACM:2006:PCC", pages = "41--50", year = "2006", DOI = "https://doi.org/10.1145/1128022.1128030", bibdate = "Tue Jun 20 06:42:45 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper presents our experience mapping OpenMP parallel programming model to the IBM Cyclops-64 (C64) architecture. The C64 employs a many-core-on-a-chip design that integrates processing logic (160 thread units), embedded memory (5MB) and communication hardware on the same die. Such a unique architecture presents new opportunities for optimization. Specifically, we consider the following three areas: (1) a memory aware runtime library that places frequently used data structures in scratchpad memory; (2) a unique spin lock algorithm for shared memory synchronization based on in-memory atomic instructions and native support for thread level execution; (3) a fast barrier that directly uses C64 hardware support for collective synchronization. All three optimizations together, result in an 80\% overhead reduction for language constructs in OpenMP. We believe that such a drastic reduction in the cost of managing parallelism makes OpenMP more amenable for writing parallel programs on the C64 platform.", acknowledgement = ack-nhfb, } @Article{Deng:2006:PIK, author = "Junjun Deng and Hengyong Yu and Jun Ni and Tao He and Shiying Zhao and Lihe Wang and Ge Wang", title = "A Parallel Implementation of the Katsevich Algorithm for {$3$-D CT} Image Reconstruction", journal = j-J-SUPERCOMPUTING, volume = "38", number = "1", pages = "35--47", month = oct, year = "2006", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-6675-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:29 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=38&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=38&issue=1&spage=35", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Computed tomography (CT); high performance computing; image reconstruction; Katsevich algorithm; medical imaging; MPI; parallel computing; spiral cone-beam CT", } @Article{Donev:2006:ICF, author = "Aleksander Donev", title = "Interoperability with {C} in {Fortran 2003}", journal = j-FORTRAN-FORUM, volume = "25", number = "1", pages = "8--12", month = apr, year = "2006", DOI = "https://doi.org/10.1145/1124708.1124710", ISSN = "1061-7264 (print), 1931-1311 (electronic)", ISSN-L = "1061-7264", bibdate = "Wed Apr 12 07:18:43 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "One of the major new features in the Fortran 2003 is features for interoperability with C Interop. The intrinsic module ISO\_C\_BINDING provides: * constants, mostly type parameters, C\_NULL\_CHAR, C\_NULL\_PTR, and others, * types, and in particular, TYPE(C\_PTR) and TYPE(C\_FUNPTR), * procedures, such as C\_LOC, C\_FUNLOC, C\_F\_POINTER, C\_F\_PROCPOINTER and C\_ASSOCIATED. A Fortran interface can be specified for a C function with external linkage and used to invoke such a function. The interface has the characteristic BIND(C) label, and must also satisfy some additional restrictions. C Interop can be used to portably use multi-language codes in Fortran. Since most languages interoperate with C, the feature can actually be used to interoperate with other programming languages as well. C Interop can also be used to give access to Fortran programmers to the many standard libraries with widely-used and implemented C interfaces. This includes lower-level tasks such as interfacing with the OS on UNIX-based systems, or using special libraries like OpenGL. For simple API's, developing Fortran interfaces is practically trivial once one gets some experience. For more complicated API's whose full functionality/power is not needed, such as for example TCP/IP sockets or shared-memory segments on UNIX systems, it is often easier to develop a condensed C API/library that does the actual work, and is simpler to interface to from Fortran. However, for libraries like OpenGL, one should provide a full Fortran interface so that the whole API can be accessed. Doing this manually is not easy and is also error-prone due to the size of the OpenGL/GLU/GLUT interfaces. For certain libraries like MPI, a special Fortran interface may be defined for the purposes of efficiency, portability, ease-of-use, or to accommodate for language semantic differences. In this first paper, we will show how to develop a Fortran interface for a simple C API/library. In a second paper, we consider automating the process so that large and more complex API's, and in particular, OpenGL, can be handled. The source codes can be obtained at http://atom.princeton.edu/donev/F2x.Along the way, we identify some problems with the design of C Interop in Fortran 2003.", acknowledgement = ack-nhfb, fjournal = "ACM Fortran Forum", xxCODEN = "????", } @Article{Drosinos:2006:EPT, author = "Nikolaos Drosinos and Nectarios Koziris", title = "The Effect of Process Topology and Load Balancing on Parallel Programming Models for {SMP} Clusters and Iterative Algorithms", journal = j-J-SUPERCOMPUTING, volume = "35", number = "1", pages = "65--91", month = jan, year = "2006", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-1156-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:27 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=35&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=35&issue=1&spage=65", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "high performance computing; hybrid programming; iterative algorithms; MPI; OpenMP; parallel programming; SMP clusters; tiling", } @Article{Huang:2006:ECS, author = "Jih-Woei Huang and Chih-Ping Chu", title = "An Efficient Communication Scheduling Method for the Processor Mapping Technique Applied Data Redistribution", journal = j-J-SUPERCOMPUTING, volume = "37", number = "3", pages = "297--318", month = sep, year = "2006", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-6615-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:29 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=37&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=37&issue=3&spage=297", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "communication scheduling; data redistribution; data-parallel programming; MPI; parallel compiler; processor mapping", } @Article{Krawezik:2006:PCM, author = "G{\'e}raud Krawezik and Franck Cappello", title = "Performance comparison of {MPI} and {OpenMP} on shared memory multiprocessors", journal = j-CCPE, volume = "18", number = "1", pages = "29--61", month = jan, year = "2006", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.905", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:00 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "11 Oct 2005", } @Article{Lastovetsky:2006:HTM, author = "Alexey Lastovetsky and Ravi Reddy", title = "{HeteroMPI}: Towards a message-passing library for heterogeneous networks of computers", journal = j-J-PAR-DIST-COMP, volume = "66", number = "2", pages = "197--220", month = feb, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:34 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Le:2006:DMC, author = "Thuy T. Le and Jalel Rejeb", title = "A detailed {MPI} communication model for distributed systems", journal = j-FUT-GEN-COMP-SYS, volume = "22", number = "3", pages = "269--278", month = feb, year = "2006", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Lee:2006:PT, author = "Edward A. Lee", title = "The Problem with Threads", journal = j-COMPUTER, volume = "39", number = "5", pages = "33--42", month = may, year = "2006", CODEN = "CPTRB4", DOI = "https://doi.org/10.1109/MC.2006.180", ISSN = "0018-9162 (print), 1558-0814 (electronic)", ISSN-L = "0018-9162", bibdate = "Fri Jul 4 17:16:20 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "For concurrent programming to become mainstream, we must discard threads as a programming model. Nondeterminism should be judiciously and carefully introduced where needed, and it should be explicit in programs. In general-purpose software engineering practice, we have reached a point where one approach to concurrent programming dominates all others namely, threads, sequential processes that share memory. They represent a key concurrency model supported by modern computers, programming languages, and operating systems. In scientific computing, where performance requirements have long demanded concurrent programming, data-parallel language extensions and message-passing libraries such as PVM, MPI, and OpenMP dominate over threads for concurrent programming. Computer architectures intended for scientific computing often differ significantly from so-called general-purpose architectures.", acknowledgement = ack-nhfb, fjournal = "Computer", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2", } @Article{Liao:2006:SDI, author = "Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward and Eric Russell and Neil Pundit", title = "Scalable Design and Implementations for {MPI} Parallel Overlapping {I/O}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "17", number = "11", pages = "1264--1276", month = nov, year = "2006", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2006.163", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Jul 3 14:26:50 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Lopez:2006:ESM, author = "F. C. Garc{\'\i}a L{\'o}pez and N. L. Fr{\'\i}as Arrocha", title = "An efficient synchronization model for {OpenMP}", journal = j-J-PAR-DIST-COMP, volume = "66", number = "11", pages = "1359--1365", month = nov, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:35 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Marowka:2006:BRP, author = "Ami Marowka", title = "Book Review: {{\em Parallel Scientific Computation: A Structured Approach using BSP and MPI}}", journal = j-SCPE, volume = "7", number = "2", pages = "107--108", month = jun, year = "2006", CODEN = "????", ISSN = "1895-1767", bibdate = "Thu Sep 2 11:55:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/7/2.toc", URL = "http://www.scpe.org/vols/vol07/no2/vol07no2bookreview.html", acknowledgement = ack-nhfb, } @Article{Mehta:2006:MSG, author = "Paras Mehta and Jos{\'e} Nelson Amaral and Duane Szafron", title = "Is {MPI} suitable for a generative design-pattern system?", journal = j-PARALLEL-COMPUTING, volume = "32", number = "7--8", pages = "616--626", month = sep, year = "2006", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Norden:2006:OVM, author = "Markus Nord{\'e}n and Sverker Holmgren and Michael Thun{\'e}", title = "{OpenMP} versus {MPI} for {PDE} solvers based on regular sparse numerical operators", journal = j-FUT-GEN-COMP-SYS, volume = "22", number = "1--2", pages = "194--203", month = jan, year = "2006", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:05 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{ODowd:2006:WGM, author = "Padraig J. O'Dowd and Adarsh Patil and John P. Morrison", title = "{WebCom-G} and {MPICH-G2} Jobs", journal = j-SCPE, volume = "7", number = "3", pages = "75--86", month = sep, year = "2006", CODEN = "????", ISSN = "1895-1767", bibdate = "Thu Sep 2 11:55:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/7/3.toc", URL = "http://www.scpe.org/vols/vol07/no3/SCPE_7_3_07.pdf; http://www.scpe.org/vols/vol07/no3/SCPE_7_3_07.zip", acknowledgement = ack-nhfb, } @Article{Ohara:2006:MMP, author = "M. Ohara and H. Inoue and Y. Sohda and H. Komatsu and T. Nakatani", title = "{MPI} microtask for programming the {Cell Broadband Engine{\TM}} processor", journal = j-IBM-SYS-J, volume = "45", number = "1", pages = "85--102", month = "????", year = "2006", CODEN = "IBMSA7", ISSN = "0018-8670", bibdate = "Mon Feb 12 18:19:14 MST 2007", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", URL = "http://www.research.ibm.com/journal/sj/451/ohara.html", acknowledgement = ack-nhfb, fjournal = "IBM Systems Journal", ordernumber = "????", } @InProceedings{Paul:2006:TLF, author = "Jerome L. Paul and Michal Kouril and Kenneth A. Berman", title = "A template library to facilitate teaching message passing parallel computing", crossref = "ACM:2006:PST", pages = "464--468", year = "2006", DOI = "https://doi.org/10.1145/1121341.1121487", bibdate = "Tue Jun 20 06:51:37 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper discusses a template-based approach to aid in introducing the upper-division undergraduate (or first year graduate) to the rapidly emerging message passing parallel computing paradigm. Our template library facilitates an accelerated MPI programming learning environment that can realistically be included as one topic among many in an algorithms course. One template module is based on a backtracking solution to the satisfiability problem (SAT), which the student first solves in the sequential setting. With the aid of a modified template, the student then develops a simple parallel SAT solver. The template includes such things as I/O functions, allowing the student to focus on the algorithm itself. The parallel part is partially provided by the template, with indicators given in places where the student needs to plug in missing MPI function calls. The students are excited about this hands-on-experience in the increasingly important world of message passing parallel computing, which might be missed if their curriculum does not include a course devoted to this topic.", acknowledgement = ack-nhfb, } @Article{Rozman:2006:CPL, author = "Igor Rozman and Marjan {\v{s}}terk and Roman Trobec", title = "Communication Performance of {LAM\slash MPI} and {MPICH} on a {Linux} Cluster", journal = j-PARALLEL-PROCESS-LETT, volume = "16", number = "3", pages = "323--334", month = sep, year = "2006", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626406002678", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Su:2006:APP, author = "Hai-Jun Su and J. Michael McCarthy and Masha Sosonkina and Layne T. Watson", title = "{Algorithm 857}: {POLSYS\_GLP}---a parallel general linear product homotopy code for solving polynomial systems of equations", journal = j-TOMS, volume = "32", number = "4", pages = "561--579", month = dec, year = "2006", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1186785.1186789", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Sat Apr 14 09:48:57 MDT 2007", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Globally convergent, probability-one homotopy methods have proven to be very effective for finding all the isolated solutions to polynomial systems of equations. After many years of development, homotopy path trackers based on probability-one homotopy methods are reliable and fast. Now, theoretical advances reducing the number of homotopy paths that must be tracked and handling singular solutions have made probability-one homotopy methods even more practical. POLSYS\_GLP consists of Fortran 95 modules for finding all isolated solutions of a complex coefficient polynomial system of equations. The package is intended to be used on a distributed memory multiprocessor in conjunction with HOMPACK90 (Algorithm 777), and makes extensive use of Fortran 95-derived data types and MPI to support a general linear product (GLP) polynomial system structure. GLP structure is intermediate between the partitioned linear product structure used by POLSYS\_PLP (Algorithm 801) and the BKK-based structure used by PHCPACK. The code requires a GLP structure as input, and although finding the optimal GLP structure is a difficult combinatorial problem, generally physical or engineering intuition about a problem yields a very good GLP structure. POLSYS\_GLP employs a sophisticated power series end game for handling singular solutions, and provides support for problem definition both at a high level and via hand-crafted code. Different GLP structures and their corresponding Bezout numbers can be systematically explored before committing to root finding.", acknowledgement = ack-nhfb, fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Weatherly:2006:DMS, author = "D. Brent Weatherly and David K. Lowenthal and Mario Nakazawa and Franklin Lowenthal", title = "{Dyn-MPI}: Supporting {MPI} on medium-scale, non-dedicated clusters", journal = j-J-PAR-DIST-COMP, volume = "66", number = "6", pages = "822--838", month = jun, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 22:04:47 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Zarrelli:2006:EPE, author = "Roberto Zarrelli and Mario Petrone and Angelo Iannaccio", title = "Enabling {PVM} to exploit the {SCTP} protocol", journal = j-J-PAR-DIST-COMP, volume = "66", number = "11", pages = "1472--1479", month = nov, year = "2006", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:35 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Zheng:2006:PEA, author = "Gengbin Zheng and Chao Huang and Laxmikant V. Kal{\'e}", title = "Performance evaluation of automatic checkpoint-based fault tolerance for {AMPI} and {Charm++}", journal = j-OPER-SYS-REV, volume = "40", number = "2", pages = "90--99", month = apr, year = "2006", CODEN = "OSRED8", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Sat Aug 26 08:55:43 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", } @Article{Adhianto:2007:PMC, author = "Laksono Adhianto and Barbara Chapman", title = "Performance modeling of communication and computation in hybrid {MPI} and {OpenMP} applications", journal = j-SIM-MODEL-PRACT-THEORY, volume = "15", number = "4", pages = "481--491", month = apr, year = "2007", CODEN = "SMPTCA", DOI = "https://doi.org/10.1016/j.simpat.2006.11.014", ISSN = "1569-190X (print), 1878-1462 (electronic)", ISSN-L = "1569-190X", bibdate = "Mon Oct 07 09:21:03 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.sciencedirect.com/science/article/pii/S1569190X06001109", acknowledgement = ack-nhfb, fjournal = "Simulation Modelling Practice and Theory", } @Article{anMey:2007:NPO, author = "Dieter an Mey and Samuel Sarholz and Christian Terboven", title = "Nested Parallelization with {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "5", pages = "459--476", month = oct, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0054-1", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:48 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=459", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "ccNUMA; Nested parallelization; OpenMP; Shared memory parallelization", } @Article{Ayguade:2007:SIO, author = "Eduard Ayguad{\'e} and Matthias S. Mueller", title = "Special Issue on {OpenMP} --- {Guest Editors}' Introduction", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "4", pages = "331--333", month = aug, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0048-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:44 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=331", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Becciani:2007:FMH, author = "U. Becciani and V. Antonuccio-Delogu and M. Comparato", title = "{FLY}: {MPI}-2 high resolution code for {LSS} cosmological simulations", journal = j-COMP-PHYS-COMM, volume = "176", number = "3", pages = "211--217", day = "1", month = feb, year = "2007", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2006.10.001", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:42:13 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465506003687", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bronevetsky:2007:CFS, author = "Greg Bronevetsky and Bronis R. de Supinski", title = "Complete Formal Specification of the {OpenMP} Memory Model", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "4", pages = "335--392", month = aug, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0051-4", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:44 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=335", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "formal systems; OpenMP; parallel programming; theorem proving", } @Article{Brown:2007:HSP, author = "Russell Brown and Ilya Sharapov", title = "High-Scalability Parallelization of a Molecular Modeling Application: Performance and Productivity Comparison Between {OpenMP} and {MPI} Implementations", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "5", pages = "441--458", month = oct, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0057-y", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:48 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=441", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Molecular modeling; MPI; OpenMP; Parallel programming", } @Article{Buntinas:2007:IES, author = "Darius Buntinas and Guillaume Mercier and William Gropp", title = "Implementation and evaluation of shared-memory communication and synchronization operations in {MPICH2} using the {Nemesis} communication subsystem", journal = j-PARALLEL-COMPUTING, volume = "33", number = "9", pages = "634--644", month = sep, year = "2007", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:07 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Carbajal:2007:PTD, author = "Santiago Garcia Carbajal", title = "Parallelizing Three Dimensional Cellular Automata with {OpenMP}", journal = j-PARALLEL-PROCESS-LETT, volume = "17", number = "4", pages = "349--361", month = dec, year = "2007", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626407003083", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Chandra:2007:ESP, author = "Sumir Chandra and Xiaolin Li and Taher Saif and Manish Parashar", title = "Enabling scalable parallel implementations of structured adaptive mesh refinement applications", journal = j-J-SUPERCOMPUTING, volume = "39", number = "2", pages = "177--203", month = feb, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0110-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:30 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=39&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=39&issue=2&spage=177", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "3-D Richtmyer-Meshkov application; Bin-packing based load-balancing; Hierarchical partitioning; MPI non-blocking communication optimization; SAMR scalability; Structured adaptive mesh refinement", } @Article{Chau:2007:MIP, author = "Ming Chau and Didier {El Baz} and Ronan Guivarch and Pierre Spiteri", title = "{MPI} implementation of parallel subdomain methods for linear and nonlinear convection--diffusion problems", journal = j-J-PAR-DIST-COMP, volume = "67", number = "5", pages = "581--591", month = may, year = "2007", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:35 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Desai:2007:CEM, author = "Narayan Desai and Ewing Lusk and Rick Bradshaw", title = "A Composition Environment for {MPI} Programs", journal = j-IJHPCA, volume = "21", number = "2", pages = "166--173", month = may, year = "2007", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007077858", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/21/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/21/2/166.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{DiMartino:2007:SIS, author = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and Jack Dongarra", title = "Special issue on selected papers from the {EuroPVM\slash MPI 2005 Conference, Sorrento, Italy, 18-21 September 2005} --- Preface", journal = j-IJHPCA, volume = "21", number = "2", pages = "129--131", month = "Summer", year = "2007", DOI = "https://doi.org/10.1177/1094342006077863", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Jun 4 08:20:03 MDT 2013", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Duran:2007:PEH, author = "Alejandro Duran and Roger Ferrer and Juan Jos{\'e} Costa and Marc Gonz{\`a}lez and Xavier Martorell and Eduard Ayguad{\'e} and Jes{\'u}s Labarta", title = "A Proposal for Error Handling in {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "4", pages = "393--416", month = aug, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0049-y", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:44 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=393", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "error handling; fault tolerance; OpenMP; parallel languages; parallel programming", } @Article{Falzone:2007:PMF, author = "Christopher Falzone and Anthony Chan and Ewing Lusk and William Gropp", title = "A Portable Method for Finding User Errors in the Usage of {MPI} Collective Operations", journal = j-IJHPCA, volume = "21", number = "2", pages = "155--165", month = may, year = "2007", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007077860", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/21/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/21/2/155.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Genaud:2007:PMP, author = "St{\'e}phane Genaud and Choopan Rattanapoka", title = "{P2P--MPI}: a Peer-to-Peer Framework for Robust Execution of Message Passing Parallel Programs on {Grids}", journal = j-J-GRID-COMP, volume = "5", number = "1", pages = "27--42", month = mar, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1007/s10723-006-9056-2", ISSN = "1570-7873 (print), 1572-9184 (electronic)", ISSN-L = "1570-7873", bibdate = "Wed Jul 9 17:01:30 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=1570-7873&volume=5&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=1570-7873&volume=5&issue=1&spage=27", acknowledgement = ack-nhfb, fjournal = "Journal of Grid Computing", journal-URL = "http://link.springer.com/journal/10723", keywords = "Grid; Java; Key words; middleware; MPI; peer-to-peer", } @Article{Giannoutakis:2007:MHP, author = "K. M. Giannoutakis and G. A. Gravvanis and B. Clayton and A. Patil and T. Enright and J. P. Morrison", title = "Matching high performance approximate inverse preconditioning to architectural platforms", journal = j-J-SUPERCOMPUTING, volume = "42", number = "2", pages = "145--163", month = nov, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0129-1", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:32 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=42&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=42&issue=2&spage=145", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Approximate inverses; Globus toolkit; MPI; Open MPI; Parallel iterative methods; Parallel/distributed computations; Preconditioned conjugate gradient method", } @Article{Graham:2007:OMH, author = "Richard L. Graham and Brian W. Barrett and Galen M. Shipman and Timothy S. Woodall and George Bosilca", title = "{Open MPI}: a High Performance, Flexible Implementation of {MPI} Point-to-Point Communications", journal = j-PARALLEL-PROCESS-LETT, volume = "17", number = "1", pages = "79--88", month = mar, year = "2007", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626407002880", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Gropp:2007:TSM, author = "William Gropp and Rajeev Thakur", title = "Thread-safety in an {MPI} implementation: Requirements and analysis", journal = j-PARALLEL-COMPUTING, volume = "33", number = "9", pages = "595--604", month = sep, year = "2007", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:07 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @InProceedings{Gu:2007:IPC, author = "Feng Long Gu and Hyacinthe Nzigou M. and Guilherme de Melo Baptista Domingues and Takeshi Nanri and Kazuaki Murakami", title = "Investigating the Performance of Collective Communications on {SMP} Clusters: a Case for {{\tt MPI\_Allgather}}", crossref = "Simos:2007:CMS", volume = "2A", pages = "52--56", year = "2007", bibdate = "Thu Feb 21 14:34:40 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://proceedings.aip.org/getpdf/servlet/GetPDFServlet?filetype=pdf& id=APCPCS000963000002000052000001& idtype=cvips", acknowledgement = ack-nhfb, } @Article{Klemm:2007:JIO, author = "Michael Klemm and Matthias Bezold and Ronald Veldema and Michael Philippsen", title = "{JaMP}: an implementation of {OpenMP} for a {Java DSM}", journal = j-CCPE, volume = "19", number = "18", pages = "2333--2352", day = "25", month = dec, year = "2007", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1178", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:18 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "3 Apr 2007", } @Article{Kurzyniec:2007:UCA, author = "Dawid Kurzyniec and Magdalena Slawi{\'n}ska and Jaroslaw Slawi{\'n}ski and Vaidy Sunderam", title = "{Unibus}: a contrarian approach to {Grid} computing", journal = j-J-SUPERCOMPUTING, volume = "42", number = "1", pages = "125--144", month = oct, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-0033-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:32 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=42&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=42&issue=1&spage=125", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Aggregation; Grids; MPI; Resource sharing; Virtualization", } @Article{Latham:2007:IMI, author = "Robert Latham and Robert Ross and Rajeev Thakur", title = "Implementing {MPI-IO} Atomic Mode and Shared File Pointers Using {MPI} One-Sided Communication", journal = j-IJHPCA, volume = "21", number = "2", pages = "132--143", month = may, year = "2007", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007077859", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/21/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/21/2/132.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Li:2007:DIV, author = "Kuan-Ching Li and Hsun-Chang Chang", title = "The design and implementation of visual performance monitoring and analysis toolkit for cluster and {Grid} environments", journal = j-J-SUPERCOMPUTING, volume = "40", number = "3", pages = "299--317", month = jun, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-0020-5", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:31 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=40&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=40&issue=3&spage=299", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Distributed computing; Monitoring; MPI parallel program; Performance visualization", } @Article{Liao:2007:CCS, author = "Wei-keng Liao and Kenin Coloma and Alok Choudhary and Lee Ward", title = "Cooperative Client-Side File Caching for {MPI} Applications", journal = j-IJHPCA, volume = "21", number = "2", pages = "144--154", month = may, year = "2007", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007077857", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/21/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/21/2/144.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Liao:2007:OOP, author = "Chunhua Liao and Oscar Hernandez and Barbara Chapman and Wenguang Chen and Weimin Zheng", title = "{OpenUH}: an optimizing, portable {OpenMP} compiler", journal = j-CCPE, volume = "19", number = "18", pages = "2317--2332", day = "25", month = dec, year = "2007", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1174", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:18 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "3 Apr 2007", } @Article{Marathe:2007:SCC, author = "Jaydeep Marathe and Frank Mueller", title = "Source-Code-Correlated Cache Coherence Characterization of {OpenMP} Benchmarks", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "18", number = "6", pages = "818--834", month = jun, year = "2007", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2007.1058", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Jul 3 14:26:52 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Marowka:2007:PCD, author = "Ami Marowka", title = "Parallel computing on any desktop", journal = j-CACM, volume = "50", number = "9", pages = "74--78", month = sep, year = "2007", CODEN = "CACMA2", DOI = "https://doi.org/10.1145/1284621.1284622", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Mon Jun 16 18:32:57 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallelization lets applications exploit the high throughput of new multicore processors, and the OpenMP parallel programming model helps developers create multithreaded applications.", acknowledgement = ack-nhfb, fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", } @Article{Mohr:2007:SPE, author = "Bernd Mohr and Jesper Larsson Tr{\"a}ff and Joachim Worringen", title = "Selected papers from {EuroPVM\slash MPI 2006}", journal = j-PARALLEL-COMPUTING, volume = "33", number = "9", pages = "593--594", month = sep, year = "2007", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:07 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Morris:2007:SNO, author = "Alan Morris and Allen D. Malony and Sameer S. Shende", title = "Supporting Nested {OpenMP} Parallelism in the {TAU} Performance System", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "4", pages = "417--436", month = aug, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0050-5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:44 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=417", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Nested parallelism; OpenMP; TAU", } @Article{Nascimento:2007:DDS, author = "Aline P. Nascimento and Alexandre C. Sena and Cristina Boeres and Vinod E. F. Rebello", title = "Distributed and dynamic self-scheduling of parallel {MPI Grid} applications", journal = j-CCPE, volume = "19", number = "14", pages = "1955--1974", day = "25", month = sep, year = "2007", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1139", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:16 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 Nov 2006", } @Article{Norden:2007:DDM, author = "Markus Nord{\'e}n and Henrik L{\"o}f and Jarmo Rantakokko and Sverker Holmgren", title = "Dynamic Data Migration for Structured {AMR} Solvers", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "5", pages = "477--491", month = oct, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0056-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:48 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=477", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Adaptive mesh refinement; cc-NUMA; Geographical locality; Graph partitioning; OpenMP; Page migration; SAMR; Shared memory", } @Article{Pandey:2007:SCM, author = "Nirved Pandey and G. K. Sharma", title = "Startup comparison for message passing libraries with {DTM} on {Linux} clusters", journal = j-J-SUPERCOMPUTING, volume = "39", number = "1", pages = "59--72", month = jan, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-006-0004-5", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:30 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=39&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=39&issue=1&spage=59", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Chameleon MPICH; Distributed Performance Index (DPI); Distributed Task Machine (DTM); High Performance Cluster (HPC); Message Passing Interface (MPI); MPI-- Parallel Virtual Machine (PVM); Relative Distributed Performance Index (RDPI)", } @Article{Pedicini:2007:PPE, author = "Marco Pedicini and Francesco Quaglia", title = "{PELCR}: {Parallel} environment for optimal lambda-calculus reduction", journal = j-TOCL, volume = "8", number = "3", pages = "14:1--14:??", month = jul, year = "2007", CODEN = "????", DOI = "https://doi.org/10.1145/1243996.1243997", ISSN = "1529-3785 (print), 1557-945X (electronic)", ISSN-L = "1529-3785", bibdate = "Mon Jun 16 14:28:15 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tocl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "In this article we present the implementation of an environment supporting L{\'e}vy's optimal reduction for the $ \lambda $-calculus on parallel (or distributed) computing systems. In a similar approach to Lamping's, we base our work on a graph reduction technique, known as directed virtual reduction, which is actually a restriction of Danos-Regnier virtual reduction.\par The environment, which we refer to as PELCR (parallel environment for optimal lambda-calculus reduction), relies on a strategy for directed virtual reduction, namely half combustion. While developing PELCR we adopted both a message aggregation technique, allowing reduction of the communication overhead, and a fair policy for distributing dynamically originated load among processors.\par We also present an experimental study demonstrating the ability of PELCR to definitely exploit the parallelism intrinsic to $ \lambda $-terms while performing the reduction. We show how PELCR allows achieving up to 70--80\% of the ideal speedup on last generation multiprocessor computing systems. As a last note, the software modules have been developed with the C language and using a standard interface for message passing, that is, MPI, thus making PELCR itself a highly portable software package.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Computational Logic", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J773", keywords = "functional programming; geometry of interaction; linear logic; optimal reduction; parallel implementation; virtual reduction", } @Article{Pjesivac-Grbovic:2007:MCA, author = "Jelena Pje{\v{s}}ivac-Grbovi{\'c} and George Bosilca and Graham E. Fagg and Thara Angskun and Jack J. Dongarra", title = "{MPI} collective algorithm selection and quadtree encoding", journal = j-PARALLEL-COMPUTING, volume = "33", number = "9", pages = "613--623", month = sep, year = "2007", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:07 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Pjesivac-Grbovic:2007:PAM, author = "Jelena Pjesivac-Grbovic and Thara Angskun and George Bosilca and Graham E. Fagg and Edgar Gabriel and Jack J. Dongarra", title = "Performance analysis of {MPI} collective operations", journal = "The Journal of Networks, Software Tools, and Cluster Computing", volume = "10", number = "2", pages = "127--143", month = "????", year = "2007", DOI = "https://doi.org/10.1007/s10586-007-0012-0", ISSN = "1386-7857", bibdate = "Tue Jun 4 08:20:03 MDT 2013", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "Denver, CO", conference-date = "Apr 03--08, 2005", conference-name = "4th International Workshop on Performance Modelling, Evaluation, and Optimization of Parallel and Distributed Systems", } @Article{Ramadan:2007:TDM, author = "Omar Ramadan", title = "Three dimensional {MPI} parallel implementation of the {PML} algorithm for truncating finite-difference time-domain {Grids}", journal = j-PARALLEL-COMPUTING, volume = "33", number = "2", pages = "109--115", month = mar, year = "2007", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:06 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Rycerz:2007:IBS, author = "Katarzyna Rycerz and Alfredo Tirado-Ramos and Alessia Gualandris and Simon F. Portegies Zwart and Marian Bubak and Peter M. A. Sloot", title = "Interactive {N}-Body Simulations on the {Grid}: {HLA} Versus {MPI}", journal = j-IJHPCA, volume = "21", number = "2", pages = "210--221", month = may, year = "2007", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007074874", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/21/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/21/2/210.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Tsujita:2007:RMP, author = "Y. Tsujita", title = "Remote {MPI-I/O} on a Parallel Virtual File System Using a Circular Buffer for High Throughput", journal = j-INT-J-COMPUT-APPL, volume = "29", number = "3", pages = "291--299", year = "2007", DOI = "https://doi.org/10.1080/1206212X.2007.11441859", ISSN = "1206-212X (print), 1925-7074 (electronic)", ISSN-L = "1206-212X", bibdate = "Sat Apr 21 17:24:05 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijca.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.tandfonline.com/doi/full/10.1080/1206212X.2007.11441859", acknowledgement = ack-nhfb, fjournal = "International Journal of Computers and Applications", journal-URL = "https://www.tandfonline.com/loi/tjca20", online-date = "11 Jul 2015", } @Article{Wang:2007:EAP, author = "Perry H. Wang and Jamison D. Collins and Gautham N. Chinya and Hong Jiang and Xinmin Tian and Milind Girkar and Nick Y. Yang and Guei-Yuan Lueh and Hong Wang", title = "{EXOCHI}: architecture and programming environment for a heterogeneous multi-core multithreaded system", journal = j-SIGPLAN, volume = "42", number = "6", pages = "156--166", month = jun, year = "2007", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1250734.1250753", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jun 18 10:55:30 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Future mainstream microprocessors will likely integrate specialized accelerators, such as GPUs, onto a single die to achieve better performance and power efficiency. However, it remains a keen challenge to program such a heterogeneous multicore platform, since these specialized accelerators feature ISAs and functionality that are significantly different from the general purpose CPU cores. In this paper, we present EXOCHI: (1) Exoskeleton Sequencer (EXO), an architecture to represent heterogeneous accelerators as ISA-based MIMD architecture resources, and a shared virtual memory heterogeneous multithreaded program execution model that tightly couples specialized accelerator cores with general-purpose CPU cores, and (2) C for Heterogeneous Integration (CHI), an integrated C/C++ programming environment that supports accelerator-specific inline assembly and domain-specific languages. The CHI compiler extends the OpenMP pragma for heterogeneous multithreading programming, and produces a single fat binary with code sections corresponding to different instruction sets. The runtime can judiciously spread parallel computation across the heterogeneous cores to optimize performance and power.\par We have prototyped the EXO architecture on a physical heterogeneous platform consisting of an Intel{\reg} Core{\TM} 2 Duo processor and an 8-core 32-thread Intel{\reg} Graphics Media Accelerator X3000. In addition, we have implemented the CHI integrated programming environment with the Intel{\reg} C++ Compiler, runtime toolset, and debugger. On the EXO prototype system, we have enhanced a suite of production-quality media kernels for video and image processing to utilize the accelerator through the CHI programming interface, achieving significant speedup (1.41X to10.97X) over execution on the IA32 CPU alone.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "GPU; heterogeneous multi-cores; openMP", } @Article{Weng:2007:OIS, author = "Tien-Hsiung Weng and Ruey-Kuen Perng and Barbara Chapman", title = "{OpenMP} Implementation of {SPICE3} Circuit Simulator", journal = j-INT-J-PARALLEL-PROG, volume = "35", number = "5", pages = "493--505", month = oct, year = "2007", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-007-0053-2", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:06:48 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=493", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "OpenMP SPICE circuit simulator; Shared-memory programming model", } @Article{Wu:2007:IFR, author = "C.-L. Wu and D.-C. Lou and S.-Y. Chen", title = "Integer factorization for {RSA} cryptosystem under a {PVM} environment", journal = j-INT-J-COMPUT-SYST-SCI-ENG, volume = "22", number = "1--2", pages = "??--??", month = jan # "\slash " # mar, year = "2007", CODEN = "CSSEEI", ISSN = "0267-6192", bibdate = "Tue Dec 03 12:31:25 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/computsystscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Computer Systems Science and Engineering", remark = "Special issue: Privacy Data Management", } @Article{Zhong:2007:PPS, author = "Wei Zhong and Gulsah Altun and Xinmin Tian and Robert Harrison and Phang C. Tai and Yi Pan", title = "Parallel protein secondary structure prediction schemes using {Pthread} and {OpenMP} over hyper-threading technology", journal = j-J-SUPERCOMPUTING, volume = "41", number = "1", pages = "1--16", month = jul, year = "2007", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0100-1", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:31 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=41&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=41&issue=1&spage=1", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "BLOSUM62 matrix; DBNN (Denoeux Belief Neural Network); Hydrophobicity matrix; Hyper-threading; MPI (Message Passing Interface); Neural networks; OpenMP; Parallel architecture; Protein secondary structure prediction; PSSM (Position Specific Scoring Matrix); Pthread; Speedup", } @Article{Akzhalova:2008:WPL, author = "Assel Zh. Akzhalova and Daniar Y. Aizhulov and Galymzhan Seralin and Gulnar Balakayeva", title = "{Web} portal for large-scale computations based on {Grid} and {MPI}", journal = j-SCPE, volume = "9", number = "2", pages = "135--142", month = jun, year = "2008", CODEN = "????", ISSN = "1895-1767", bibdate = "Thu Sep 2 11:55:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/9/2.toc", URL = "http://www.scpe.org/vols/vol09/no2/SCPE_9_2_06.pdf; http://www.scpe.org/vols/vol09/no2/SCPE_9_2_06.zip", acknowledgement = ack-nhfb, } @TechReport{Baboulin:2008:SID, author = "Marc Baboulin and Jack J. Dongarra and Stanimire Tomov", title = "Some Issues in Dense Linear Algebra for Multicore and Special Purpose Architectures", type = "LAPACK Working Note", number = "200", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = may, year = "2008", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn200.pdf", abstract = "We address some key issues in designing dense linear algebra (DLA) algorithms that are common for both multi/many-cores and special purpose architectures (in particular GPUs). We present them in the context of an LU factorization algorithm, where randomization techniques are used as an alternative to pivoting. This approach yields an algorithm based entirely on a collection of small Level 3 BLAS type computational tasks, which has emerged as a common goal in designing DLA algorithms for new architectures. Other common trends, also considered here, are block asynchronous task execution and ``Block'' layouts for the data associated with the separate tasks. We present numerical results and other specific experiments with DLA algorithms on NVIDIA GPUs using CUDA. The GPU results are also of interest themselves as we show a performance of up to 160 Glop/s on a single Quadro FX 5600 card. Keywords: dense linear algebra, parallel algorithms, LU factorization, multicore processors, graphic process units.", acknowledgement = ack-nhfb, utknumber = "UT-CS-08-615", } @Article{Bernabeu:2008:MPA, author = "Miguel O. Bernabeu and Pedro Alonso and Antonio M. Vidal", title = "A multilevel parallel algorithm to solve symmetric {Toeplitz} linear systems", journal = j-J-SUPERCOMPUTING, volume = "44", number = "3", pages = "237--256", month = jun, year = "2008", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0157-x", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:34 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=44&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=44&issue=3&spage=237", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Cauchy-like matrix; MPI; Multilevel parallel programming; OpenMP; Rank displacement; Toeplitz matrix", } @InProceedings{Bischof:2008:PRM, author = "Christian Bischof and Niels Guertler and Andreas Kowarz", title = "Parallel Reverse Mode Automatic Differentiation for {OpenMP} Programs with {ADOL-C}", crossref = "Bischof:2008:AAD", volume = "64", pages = "163--173", year = "2008", DOI = "https://doi.org/10.1007/978-3-540-68942-3_15", bibdate = "Sat Dec 22 08:33:39 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-68942-3_15", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-540-68942-3", book-URL = "http://www.springerlink.com/content/978-3-540-68942-3", } @Article{Bondhugula:2008:PAP, author = "Uday Bondhugula and Albert Hartono and J. Ramanujam and P. Sadayappan", title = "A practical automatic polyhedral parallelizer and locality optimizer", journal = j-SIGPLAN, volume = "43", number = "6", pages = "101--113", month = jun, year = "2008", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1379022.1375595", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jun 18 11:04:53 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We present the design and implementation of an automatic polyhedral source-to-source transformation framework that can optimize regular programs (sequences of possibly imperfectly nested loops) for parallelism and locality simultaneously. Through this work, we show the practicality of analytical model-driven automatic transformation in the polyhedral model -- far beyond what is possible by current production compilers. Unlike previous works, our approach is an end-to-end fully automatic one driven by an integer linear optimization framework that takes an explicit view of finding good ways of tiling for parallelism and locality using affine transformations. The framework has been implemented into a tool to automatically generate OpenMP parallel code from C program sections. Experimental results from the tool show very high speedups for local and parallel execution on multi-cores over state-of-the-art compiler frameworks from the research community as well as the best native production compilers. The system also enables the easy use of powerful empirical/iterative optimization for general arbitrarily nested loop sequences.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "affine transformations; automatic parallelization; locality optimization; loop transformations; polyhedral model; tiling", } @Article{Buntinas:2008:BVN, author = "Darius Buntinas and Camille Coti and Thomas Herault and Pierre Lemarinier and Laurence Pilard and Ala Rezmerita and Eric Rodriguez and Franck Cappello", title = "Blocking vs. non-blocking coordinated checkpointing for large-scale fault tolerant {MPI} Protocols", journal = j-FUT-GEN-COMP-SYS, volume = "24", number = "1", pages = "73--84", month = jan, year = "2008", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:10 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Book{Chapman:2008:UOP, author = "Barbara Chapman and Gabriele Jost and Ruud van der Pas", title = "Using {OpenMP}: portable shared memory parallel programming", publisher = pub-MIT, address = pub-MIT:adr, pages = "xxii + 353", year = "2008", ISBN = "0-262-03377-1 (hardcover), 0-262-53302-2 (paperback)", ISBN-13 = "978-0-262-03377-0 (hardcover), 978-0-262-53302-7 (paperback)", LCCN = "QA76.642 .C49 2008", bibdate = "Sat Oct 5 07:59:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = "Scientific and engineering computation", URL = "http://www.loc.gov/catdir/toc/ecip0721/2007026656.html", abstract = "This title presents a comprehensive overview of OpenMP, the standard application programming interface for shared memory parallel computing - a reference for students and professionals. OpenMP, a portable programming interface for shared memory parallel computers, was adopted as an informal standard in 1997 by computer scientists who wanted a unified model on which to base programs for shared memory systems. OpenMP is now used by many software developers; it offers significant advantages over both hand-threading and MPI. ``Using OpenMP'' offers a comprehensive introduction to parallel programming concepts and a detailed overview of OpenMP. ``Using OpenMP'' discusses hardware developments, describes where OpenMP is applicable, and compares OpenMP to other programming interfaces for shared and distributed memory parallel architectures. It introduces the individual features of OpenMP, provides many source code examples that demonstrate the use and functionality of the language constructs, and offers tips on writing an efficient OpenMP program. It describes how to use OpenMP in full-scale applications to achieve high performance on large-scale architectures, discussing several case studies in detail, and offers in-depth troubleshooting advice.", acknowledgement = ack-nhfb, author-dates = "1954--", subject = "Parallel programming (Computer science); Application program interfaces (Computer software)", tableofcontents = "1. Introduction \\ 2. Overview of OpenMP \\ 3. Writing a First OpenMP Program \\ 4. OpenMP Language Features \\ 5. How to Get Good Performance by Using OpenMP \\ 6. Using OpenMP in the Real World \\ 7. Troubleshooting \\ 8. Under the Hood: How OpenMP Really Works \\ 9. The Future of OpenMP", } @Article{Che:2008:PSG, author = "Shuai Che and Michael Boyer and Jiayuan Meng and David Tarjan and Jeremy W. Sheaffer and Kevin Skadron", title = "A performance study of general-purpose applications on graphics processors using {CUDA}", journal = j-J-PAR-DIST-COMP, volume = "68", number = "10", pages = "1370--1380", month = oct, year = "2008", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Sep 1 16:27:23 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Dalcin:2008:MPP, author = "Lisandro Dalc{\'\i}n and Rodrigo Paz and Mario Storti and Jorge D'El{\'\i}a", title = "{MPI} for {Python}: Performance improvements and {MPI-2} extensions", journal = j-J-PAR-DIST-COMP, volume = "68", number = "5", pages = "655--662", month = may, year = "2008", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 11 20:32:36 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{DiMartino:2008:SSG, author = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and Jack Dongarra", title = "Special section: {Grid} computing and the {Message Passing Interface}", journal = j-FUT-GEN-COMP-SYS, volume = "24", number = "2", pages = "119--120", month = feb, year = "2008", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Faraj:2008:SPA, author = "Ahmad Faraj and Pitch Patarasuk and Xin Yuan", title = "A Study of Process Arrival Patterns for {MPI} Collective Operations", journal = j-INT-J-PARALLEL-PROG, volume = "36", number = "6", pages = "543--570", month = dec, year = "2008", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:46 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=6; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=6&spage=543", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Freeh:2008:JTD, author = "Vincent W. Freeh and Nandini Kappiah and David K. Lowenthal and Tyler K. Bletsch", title = "Just-in-time dynamic voltage scaling: Exploiting inter-node slack to save energy in {MPI} programs", journal = j-J-PAR-DIST-COMP, volume = "68", number = "9", pages = "1175--1185", month = sep, year = "2008", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Sep 1 16:27:22 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Fujimoto:2008:DMV, author = "Noriyuki Fujimoto", title = "Dense Matrix-Vector Multiplication on the {CUDA} Architecture", journal = j-PARALLEL-PROCESS-LETT, volume = "18", number = "4", pages = "511--530", month = dec, year = "2008", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626408003545", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:11 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Gao:2008:GEI, author = "Guang R. Gao and Mitsuhisa Sato and Eduard Ayguad{\'e}", title = "{Guest Editors} Introduction: Special Issue on {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "36", number = "3", pages = "287--288", month = jun, year = "2008", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-008-0076-3", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:07:10 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=287", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Garland:2008:PCE, author = "Michael Garland and Scott {Le Grand} and John Nickolls and Joshua Anderson and Jim Hardwick and Scott Morton and Everett Phillips and Yao Zhang and Vasily Volkov", title = "Parallel Computing Experiences with {CUDA}", journal = j-IEEE-MICRO, volume = "28", number = "4", pages = "13--27", month = jul # "\slash " # aug, year = "2008", CODEN = "IEMIDZ", DOI = "https://doi.org/10.1109/MM.2008.57", ISSN = "0272-1732 (print), 1937-4143 (electronic)", ISSN-L = "0272-1732", bibdate = "Tue Sep 9 15:18:16 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeemicro.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", } @Article{Genaud:2008:EPC, author = "St{\'e}phane Genaud and Pierre Gan{\c{c}}arski and Guillaume Latu and Alexandre Blansch{\'e} and Choopan Rattanapoka and Damien Vouriot", title = "Exploitation of a parallel clustering algorithm on commodity hardware with {P2P-MPI}", journal = j-J-SUPERCOMPUTING, volume = "43", number = "1", pages = "21--41", month = jan, year = "2008", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0136-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:33 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=43&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=43&issue=1&spage=21", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Clustering; Evolutionary algorithms; Grid; Java; Parallel algorithms", } @Article{Gregoretti:2008:MGE, author = "F. Gregoretti and G. Laccetti and A. Murli and G. Oliva and U. Scafuri", title = "{MGF}: a grid-enabled {MPI} library", journal = j-FUT-GEN-COMP-SYS, volume = "24", number = "2", pages = "158--165", month = feb, year = "2008", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Ha:2008:NBP, author = "Phuong Hoai Ha and Philippas Tsigas and Otto J. Anshus", title = "Non-blocking programming on multi-core graphics processors: (extended abstract)", journal = j-COMP-ARCH-NEWS, volume = "36", number = "5", pages = "19--28", month = dec, year = "2008", CODEN = "CANED2", DOI = "https://doi.org/10.1145/1556444.1556448", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Fri Jun 26 11:50:56 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "This paper investigates the synchronization power of coalesced memory accesses, a family of memory access mechanisms introduced in recent large multicore architectures like the CUDA graphics processors. We first design three memory access models to capture the fundamental features of the new memory access mechanisms. Subsequently, we prove the exact synchronization power of these models in terms of their consensus numbers. These tight results show that the coalesced memory access mechanisms can facilitate strong synchronization between the threads of multicore processors, without the need of synchronization primitives other than reads and writes.\par Moreover, based on the intrinsic features of recent GPU architectures, we construct strong synchronization objects like wait-free and t-resilient read-modify-write objects for a general model of recent GPU architectures without strong hardware synchronization primitives like test-and-set and compare-and-swap. Accesses to the wait-free objects have time complexity $ O(N) $, where $N$ is the number of processes. Our result demonstrates that it is possible to construct waitfree synchronization mechanisms for GPUs without the need of strong synchronization primitives in hardware and that wait-free programming is possible for GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Hou:2008:BBS, author = "Qiming Hou and Kun Zhou and Baining Guo", title = "{BSGP}: bulk-synchronous {GPU} programming", journal = j-TOG, volume = "27", number = "3", pages = "19:1--19:??", month = aug, year = "2008", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/1360612.1360618", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Tue Aug 12 13:40:36 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "We present BSGP, a new programming language for general purpose computation on the GPU. A BSGP program looks much the same as a sequential C program. Programmers only need to supply a bare minimum of extra information to describe parallel processing on GPUs. As a result, BSGP programs are easy to read, write, and maintain. Moreover, the ease of programming does not come at the cost of performance. A well-designed BSGP compiler converts BSGP programs to kernels and combines them using optimally allocated temporary streams. In our benchmark, BSGP programs achieve similar or better performance than well-optimized CUDA programs, while the source code complexity and programming time are significantly reduced. To test BSGP's code efficiency and ease of programming, we implemented a variety of GPU applications, including a highly sophisticated X3D parser that would be extremely difficult to develop with existing GPU programming languages.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", keywords = "bulk synchronous parallel programming; programable graphics hardware; stream processing; thread manipulation", } @InCollection{Howes:2008:U, author = "L. Howes and D. B. Thomas", title = "Efficient Random Number Generation and Application Using {CUDA}", crossref = "Nguyen:2008:GG", chapter = "37", pages = "805--830", year = "2008", bibdate = "Sat Feb 08 18:40:34 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "random-number generation", } @Article{Huang:2008:FPM, author = "Jih-Woei Huang and Chih-Ping Chu", title = "A flexible processor mapping technique toward data localization for block-cyclic data redistribution", journal = j-J-SUPERCOMPUTING, volume = "45", number = "2", pages = "151--172", month = aug, year = "2008", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0166-9", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:35 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=45&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=45&issue=2&spage=151", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Data distribution; Data-parallel programming; Distributed memory multicomputers; HPF; MPI; Processor mapping", } @Article{Jeun:2008:OPB, author = "Woo-Chul Jeun and Yang-Suk Kee and Soonhoi Ha and Changdon Kee", title = "Overcoming performance bottlenecks in using {OpenMP} on {SMP} clusters", journal = j-PARALLEL-COMPUTING, volume = "34", number = "10", pages = "570--592", month = oct, year = "2008", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:09 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Jin:2008:PEM, author = "Haoqiang Jin and Barbara Chapman and Lei Huang and Dieter an Mey and Thomas Reichstein", title = "Performance Evaluation of a Multi-Zone Application in Different {OpenMP} Approaches", journal = j-INT-J-PARALLEL-PROG, volume = "36", number = "3", pages = "312--325", month = jun, year = "2008", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-008-0074-5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:07:10 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=312", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Multi-level parallelism; OpenMP extensions; Performance evaluation", } @Article{Kwon:2008:RPP, author = "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and Soonhoi Ha and Yunheung Paek", title = "A retargetable parallel-programming framework for {MPSoC}", journal = j-TODAES, volume = "13", number = "3", pages = "39:1--39:??", month = jul, year = "2008", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1367045.1367048", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Tue Aug 5 18:41:27 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "As more processing elements are integrated in a single chip, embedded software design becomes more challenging: It becomes a parallel programming for nontrivial heterogeneous multiprocessors with diverse communication architectures, and design constraints such as hardware cost, power, and timeliness. In the current practice of parallel programming with MPI or OpenMP, the programmer should manually optimize the parallel code for each target architecture and for the design constraints. Thus, the design-space exploration of MPSoC (multiprocessor systems-on-chip) costs become prohibitively large as software development overhead increases drastically. To solve this problem, we develop a parallel-programming framework based on a novel programming model called common intermediate code (CIC). In a CIC, functional parallelism and data parallelism of application tasks are specified independently of the target architecture and design constraints. Then, the CIC translator translates the CIC into the final parallel code, considering the target architecture and design constraints to make the CIC retargetable. Experiments with preliminary examples, including the H.263 decoder, show that the proposed parallel-programming framework increases the design productivity of MPSoC software significantly.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-space exploration; embedded software; multiprocessor system on chip; parallel-programming; software generation", } @Article{Liu:2008:AMD, author = "Weiguo Liu and Bertil Schmidt and Gerrit Voss and Wolfgang M{\"u}ller-Wittig", title = "Accelerating molecular dynamics simulations using Graphics Processing Units with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "179", number = "9", pages = "634--641", day = "1", month = nov, year = "2008", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2008.05.008", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:42:37 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465508002191", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Luckow:2008:MFT, author = "Andr{\'e} Luckow and Bettina Schnor", title = "{Migol}: a fault-tolerant service framework for {MPI} applications in the {Grid}", journal = j-FUT-GEN-COMP-SYS, volume = "24", number = "2", pages = "142--152", month = feb, year = "2008", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Milovanovic:2008:NEE, author = "Milos Milovanovi{\'c} and Roger Ferrer and Vladimir Gajinov and Osman S. Unsal and Adrian Cristal and Eduard Ayguad{\'e} and Mateo Valero", title = "{Nebelung}: Execution Environment for Transactional {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "36", number = "3", pages = "326--346", month = jun, year = "2008", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-008-0073-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:07:10 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=326", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Compiler; OpenMP; Runtime system; Software Transactional Memory", } @Article{Nickolls:2008:SPP, author = "John Nickolls and Ian Buck and Michael Garland and Kevin Skadron", title = "Scalable parallel programming with {CUDA}", journal = j-QUEUE, volume = "6", number = "2", pages = "40--53", month = mar, year = "2008", CODEN = "AQCUAE", DOI = "https://doi.org/10.1145/1365490.1365500", ISSN = "1542-7730 (print), 1542-7749 (electronic)", ISSN-L = "1542-7730", bibdate = "Fri Jun 20 11:18:38 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/queue.bib", abstract = "Is CUDA the parallel programming model that application developers have been waiting for?", acknowledgement = ack-nhfb, fjournal = "ACM Queue: Tomorrow's Computing Today", } @Article{Noble:2008:GMY, author = "Michael S. Noble", title = "Getting more from your multicore: exploiting {OpenMP} from an open-source numerical scripting language", journal = j-CCPE, volume = "20", number = "16", pages = "1877--1891", month = nov, year = "2008", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1296", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:28 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "4 Jun 2008", } @Article{OBrien:2008:SOC, author = "Kevin O{\'B}rien and Kathryn O{\'B}rien and Zehra Sura and Tong Chen and Tao Zhang", title = "Supporting {OpenMP} on {Cell}", journal = j-INT-J-PARALLEL-PROG, volume = "36", number = "3", pages = "289--311", month = jun, year = "2008", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-008-0072-7", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Jul 9 16:07:10 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=289", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", keywords = "Data transfer; Heterogeneous architecture; OpenMP; Thread synchronization", } @Article{Patrick:2008:CEO, author = "Christina M. Patrick and SeungWoo Son and Mahmut Kandemir", title = "Comparative evaluation of overlap strategies with study of {I/O} overlap in {MPI-IO}", journal = j-OPER-SYS-REV, volume = "42", number = "6", pages = "43--49", month = oct, year = "2008", CODEN = "OSRED8", DOI = "https://doi.org/10.1145/1453775.1453784", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Thu Oct 23 14:23:29 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Many scientific applications use parallel I/O to meet the low latency and high bandwidth I/O requirement. Among many available parallel I/O operations, collective I/O is one of the most popular methods when the storage layouts and access patterns of data do not match. The implementation of collective I/O typically involves disk I/O operations followed by interprocessor communications. Also, in many I/O-intensive applications, parallel I/O operations are usually followed by parallel computations. This paper presents a comparative study of different overlap strategies in parallel applications. We have experimented with four different overlap strategies (1) Overlapping I/O and communication; (2) Overlapping I/O and computation; (3) Overlapping computation and communication; and (4) Overlapping I/O, communication, and computation. All experiments have been conducted on a Linux Cluster and the performance results obtained are very encouraging. On an average, we have enhanced the performance of a generic collective read call by 38\%, the MxM benchmark by 26\%, and the FFT benchmark by 34\%.", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", } @Article{Rodriguez:2008:FTS, author = "Gabriel Rodr{\'\i}guez and Xo{\'a}n C. Pardo and Mar{\'\i}a J. Mart{\'\i}n and Patricia Gonz{\'a}lez and Daniel D{\'\i}az", title = "A Fault Tolerance Solution for Sequential and {MPI} Applications on the {Grid}", journal = j-SCPE, volume = "9", number = "2", pages = "101--109", month = jun, year = "2008", CODEN = "????", ISSN = "1895-1767", bibdate = "Thu Sep 2 11:55:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.scpe.org/content/9/2.toc", URL = "http://www.scpe.org/vols/vol09/no2/SCPE_9_2_03.pdf; http://www.scpe.org/vols/vol09/no2/SCPE_9_2_03.zip", acknowledgement = ack-nhfb, } @Article{Rolfe:2008:PFO, author = "Timothy J. Rolfe", title = "Perverse and foolish oft {I} strayed", journal = j-SIGCSE, volume = "40", number = "2", pages = "52--55", month = jun, year = "2008", CODEN = "SIGSD3", DOI = "https://doi.org/10.1145/1383602.1383634", ISSN = "0097-8418 (print), 2331-3927 (electronic)", ISSN-L = "0097-8418", bibdate = "Sat Nov 17 15:44:13 MST 2012", bibsource = "DBLP; http://dblp.uni-trier.de/db/journals/sigcse/sigcse40.html#Rolfe08; http://portal.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigcse2000.bib", URL = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Misc/DBLP/2008.bib", abstract = "This uses a massively wrong-headed algorithm for sorting to exemplify the use of the backtracking strategy and the branch-and-bound strategy. In addition, brief notes are included on parallel processing approaches: Java threads on multi-core computers and distributed processing through such message passing systems as PVM and MPI.", acknowledgement = ack-nhfb, fjournal = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688", } @Article{Rolfe:2008:SMA, author = "Timothy J. Rolfe", title = "A specimen {MPI} application: {$N$}-Queens in parallel", journal = j-SIGCSE, volume = "40", number = "4", pages = "42--45", month = dec, year = "2008", CODEN = "SIGSD3", DOI = "https://doi.org/10.1145/1473195.1473217", ISSN = "0097-8418 (print), 2331-3927 (electronic)", ISSN-L = "0097-8418", bibdate = "Sat Nov 17 15:44:17 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigcse2000.bib", abstract = "The generalized problem of placing n queens on an n -by- n board provides an ``embarrassingly parallel'' problem for parallel solution. This paper expands on the discussion presented in the May 2005 issue of Dr. Dobb's Journal [1], specifically taking the parallel execution through Java threads and bringing it into an application in C taking advantage of MPI.", acknowledgement = ack-nhfb, fjournal = "SIGCSE Bulletin (ACM Special Interest Group on Computer Science Education)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688", } @Article{Sala:2008:PHP, author = "Marzio Sala and W. F. Spotz and M. A. Heroux", title = "{PyTrilinos}: {High-performance} distributed-memory solvers for {Python}", journal = j-TOMS, volume = "34", number = "2", pages = "7:1--7:33", month = mar, year = "2008", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1326548.1326549", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Thu Jun 12 12:47:31 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "PyTrilinos is a collection of Python modules that are useful for serial and parallel scientific computing. This collection contains modules that cover serial and parallel dense linear algebra, serial and parallel sparse linear algebra, direct and iterative linear solution techniques, domain decomposition and multilevel preconditioners, nonlinear solvers, and continuation algorithms. Also included are a variety of related utility functions and classes, including distributed I/O, coloring algorithms, and matrix generation. PyTrilinos vector objects are integrated with the popular NumPy Python module, gathering together a variety of high-level distributed computing operations with serial vector operations.\par PyTrilinos is a set of interfaces to existing, compiled libraries. This hybrid framework uses Python as front-end, and efficient precompiled libraries for all computationally expensive tasks. Thus, we take advantage of both the flexibility and ease of use of Python, and the efficiency of the underlying C++, C, and FORTRAN numerical kernels. Out numerical results show that, for many important problem classes, the overhead required by the Python interpreter is negligible.\par To run in parallel, PyTrilinos simply requires a standard Python interpreter. The fundamental MPI calls are encapsulated under an abstract layer that manages all interprocessor communications. This makes serial and parallel scripts using PyTrilinos virtually identical.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "direct solvers; multilevel preconditioners; nonlinear solvers; object-oriented programming; script languages", } @Article{Schmitz:2008:IIG, author = "Arne Schmitz and Markus Tavenrath and Leif Kobbelt", title = "Illumination: Interactive Global Illumination for Deformable Geometry in {CUDA}", journal = j-CGF, volume = "27", number = "7", pages = "1979--1986", month = oct, year = "2008", CODEN = "CGFODY", DOI = "https://doi.org/10.1111/j.1467-8659.2008.01347.x", ISSN = "0167-7055 (print), 1467-8659 (electronic)", ISSN-L = "0167-7055", bibdate = "Sat May 11 13:27:05 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/cgf.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Com{\-}pu{\-}ter Graphics Forum", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/", onlinedate = "23 Jan 2009", } @Article{Siegel:2008:CSE, author = "Stephen F. Siegel and Anastasia Mironova and George S. Avrunin and Lori A. Clarke", title = "Combining symbolic execution with model checking to verify parallel numerical programs", journal = j-TOSEM, volume = "17", number = "2", pages = "10:1--10:??", month = apr, year = "2008", CODEN = "ATSMER", DOI = "https://doi.org/10.1145/1348250.1348256", ISSN = "1049-331X (print), 1557-7392 (electronic)", ISSN-L = "1049-331X", bibdate = "Mon Jun 16 11:13:13 MDT 2008", bibsource = "http://www.acm.org/pubs/contents/journals/tosem/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We present a method to verify the correctness of parallel programs that perform complex numerical computations, including computations involving floating-point arithmetic. This method requires that a sequential version of the program be provided, to serve as the specification for the parallel one. The key idea is to use model checking, together with symbolic execution, to establish the equivalence of the two programs. In this approach the path condition from symbolic execution of the sequential program is used to constrain the search through the parallel program. To handle floating-point operations, three different types of equivalence are supported. Several examples are presented, demonstrating the approach and actual errors that were found. Limitations and directions for future research are also described.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Software Engineering and Methodology", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J790", keywords = "concurrency; Finite-state verification; floating-point; high performance computing; Message Passing Interface; model checking; MPI; numerical program; parallel programming; Spin; symbolic execution", } @Article{Valencia:2008:PPR, author = "David Valencia and Alexey Lastovetsky and Maureen O'Flynn and Antonio Plaza and Javier Plaza", title = "Parallel Processing of Remotely Sensed Hyperspectral Images on Heterogeneous Networks of Workstations Using {HeteroMPI}", journal = j-IJHPCA, volume = "22", number = "4", pages = "386--407", month = nov, year = "2008", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342007088377", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/22/4.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/22/4/386.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{VanZee:2008:SPF, author = "Field G. {Van Zee} and Paolo Bientinesi and Tze Meng Low and Robert A. van de Geijn", title = "Scalable parallelization of {FLAME} code via the workqueuing model", journal = j-TOMS, volume = "34", number = "2", pages = "10:1--10:29", month = mar, year = "2008", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1326548.1326552", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Thu Jun 12 12:47:31 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We discuss the OpenMP parallelization of linear algebra algorithms that are coded using the Formal Linear Algebra Methods Environment (FLAME) API. This API expresses algorithms at a higher level of abstraction, avoids the use loop and array indices, and represents these algorithms as they are formally derived and presented. We report on two implementations of the workqueuing model, neither of which requires the use of explicit indices to specify parallelism. The first implementation uses the experimental taskq pragma, which may influence the adoption of a similar construct into OpenMP 3.0. The second workqueuing implementation is domain-specific to FLAME but allows us to illustrate the benefits of sorting tasks according to their computational cost prior to parallel execution. In addition, we discuss how scalable parallelization of dense linear algebra algorithms via OpenMP will require a two-dimensional partitioning of operands much like a 2D data distribution is needed on distributed memory architectures. We illustrate the issues and solutions by discussing the parallelization of the symmetric rank-$k$ update and report impressive performance on an SGI system with 14 Itanium2 processors.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "FLAME; OpenMP; parallel; scalability; SMP; workqueuing", } @Article{Wang:2008:PIM, author = "Kun Wang and Yu Zhang and Huayong Wang and Xiaowei Shen", title = "Parallelization of {IBM Mambo} system simulator in functional modes", journal = j-OPER-SYS-REV, volume = "42", number = "1", pages = "71--76", month = jan, year = "2008", CODEN = "OSRED8", DOI = "https://doi.org/10.1145/1341312.1341325", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Fri Jun 20 17:19:29 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Mambo [4] is IBM's full-system simulator which models PowerPC systems, and provides a complete set of simulation tools to help IBM and its partners in pre-hardware development and performance evaluation for future systems. Currently Mambo simulates target systems on a single host thread. When the number of cores increases in a target system, Mambo's simulation performance for each core goes down. As the so-called `multi-core era' approaches, both target and host systems will have more and more cores. It is very important for Mambo to efficiently simulate a multi-core target system on a multi-core host system. Parallelization is a natural method to speed up Mambo under this situation.\par Parallel Mambo (P-Mambo) is a multi-threaded implementation of Mambo. Mambo's simulation engine is implemented as a user-level thread-scheduler. We propose a multi-scheduler method to adapt Mambo's simulation engine to multi-threaded execution. Based on this method a core-based module partition is proposed to achieve both high inter-scheduler parallelism and low inter-scheduler dependency. Protection of shared resources is crucial to both correctness and performance of P-Mambo. Since there are two tiers of threads in P-Mambo, protecting shared resources by only OS-level locks possibly introduces deadlocks due to user-level context switch. We propose a new lock mechanism to handle this problem. Since Mambo is an on-going project with many modules currently under development, co-existence with new modules is also important to P-Mambo. We propose a global-lock-based method to guarantee compatibility of P-Mambo with future Mambo modules.\par We have implemented the first version of P-Mambo in functional modes. The performance of P-Mambo has been evaluated on the OpenMP implementation of NAS Parallel Benchmark (NPB) 3.2 [12]. Preliminary experimental results show that P-Mambo achieves an average speedup of 3.4 on a 4-core host machine.", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", keywords = "architectural simulation; dynamic binary translation; parallel simulation", } @Article{Wegiel:2008:MCVa, author = "Michal Wegiel and Chandra Krintz", title = "The mapping collector: virtual memory support for generational, parallel, and concurrent compaction", journal = j-COMP-ARCH-NEWS, volume = "36", number = "1", pages = "91--102", month = mar, year = "2008", CODEN = "CANED2", DOI = "https://doi.org/10.1145/1353535.1346294", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Tue Jun 17 11:51:35 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel and concurrent garbage collectors are increasingly employed by managed runtime environments (MREs) to maintain scalability, as multi-core architectures and multi-threaded applications become pervasive. Moreover, state-of-the-art MREs commonly implement compaction to eliminate heap fragmentation and enable fast linear object allocation.\par Our empirical analysis of object demographics reveals that unreachable objects in the heap tend to form clusters large enough to be effectively managed at the granularity of virtual memory pages. Even though processes can manipulate the mapping of the virtual address space through the standard operating system (OS) interface on most platforms, extant parallel/concurrent compactors do not do so to exploit this clustering behavior and instead achieve compaction by performing, relatively expensive, object moving and pointer adjustment.\par We introduce the Mapping Collector (MC), which leverages virtual memory operations to reclaim and consolidate free space without moving objects and updating pointers. MC is a nearly-single-phase compactor that is simpler and more efficient than previously reported compactors that comprise two to four phases. Through effective MRE-OS coordination, MC maintains the simplicity of a non-moving collector while providing efficient parallel and concurrent compaction.\par We implement both stop-the-world and concurrent MC in a generational garbage collection framework within the open-source HotSpot Java Virtual Machine. Our experimental evaluation using a multiprocessor indicates that MC significantly increases throughput and scalability as well as reduces pause times, relative to state-of-the-art, parallel and concurrent compactors.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", keywords = "compaction; concurrent; parallel; virtual memory", } @Article{Wegiel:2008:MCVb, author = "Michal Wegiel and Chandra Krintz", title = "The {Mapping Collector}: virtual memory support for generational, parallel, and concurrent compaction", journal = j-OPER-SYS-REV, volume = "42", number = "2", pages = "91--102", month = mar, year = "2008", CODEN = "OSRED8", DOI = "https://doi.org/10.1145/1353535.1346294", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Fri Jun 20 17:20:12 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel and concurrent garbage collectors are increasingly employed by managed runtime environments (MREs) to maintain scalability, as multi-core architectures and multi-threaded applications become pervasive. Moreover, state-of-the-art MREs commonly implement compaction to eliminate heap fragmentation and enable fast linear object allocation.\par Our empirical analysis of object demographics reveals that unreachable objects in the heap tend to form clusters large enough to be effectively managed at the granularity of virtual memory pages. Even though processes can manipulate the mapping of the virtual address space through the standard operating system (OS) interface on most platforms, extant parallel/concurrent compactors do not do so to exploit this clustering behavior and instead achieve compaction by performing, relatively expensive, object moving and pointer adjustment.\par We introduce the Mapping Collector (MC), which leverages virtual memory operations to reclaim and consolidate free space without moving objects and updating pointers. MC is a nearly-single-phase compactor that is simpler and more efficient than previously reported compactors that comprise two to four phases. Through effective MRE-OS coordination, MC maintains the simplicity of a non-moving collector while providing efficient parallel and concurrent compaction.\par We implement both stop-the-world and concurrent MC in a generational garbage collection framework within the open-source HotSpot Java Virtual Machine. Our experimental evaluation using a multiprocessor indicates that MC significantly increases throughput and scalability as well as reduces pause times, relative to state-of-the-art, parallel and concurrent compactors.", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", keywords = "compaction; concurrent; parallel; virtual memory", } @Article{Wegiel:2008:MCVc, author = "Michal Wegiel and Chandra Krintz", title = "The mapping collector: virtual memory support for generational, parallel, and concurrent compaction", journal = j-SIGPLAN, volume = "43", number = "3", pages = "91--102", month = mar, year = "2008", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1353535.1346294", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Jun 18 11:03:40 MDT 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel and concurrent garbage collectors are increasingly employed by managed runtime environments (MREs) to maintain scalability, as multi-core architectures and multi-threaded applications become pervasive. Moreover, state-of-the-art MREs commonly implement compaction to eliminate heap fragmentation and enable fast linear object allocation.\par Our empirical analysis of object demographics reveals that unreachable objects in the heap tend to form clusters large enough to be effectively managed at the granularity of virtual memory pages. Even though processes can manipulate the mapping of the virtual address space through the standard operating system (OS) interface on most platforms, extant parallel/concurrent compactors do not do so to exploit this clustering behavior and instead achieve compaction by performing, relatively expensive, object moving and pointer adjustment.\par We introduce the Mapping Collector (MC), which leverages virtual memory operations to reclaim and consolidate free space without moving objects and updating pointers. MC is a nearly-single-phase compactor that is simpler and more efficient than previously reported compactors that comprise two to four phases. Through effective MRE-OS coordination, MC maintains the simplicity of a non-moving collector while providing efficient parallel and concurrent compaction.\par We implement both stop-the-world and concurrent MC in a generational garbage collection framework within the open-source HotSpot Java Virtual Machine. Our experimental evaluation using a multiprocessor indicates that MC significantly increases throughput and scalability as well as reduces pause times, relative to state-of-the-art, parallel and concurrent compactors.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "compaction; concurrent; parallel; virtual memory", } @Article{Yang:2008:DPL, author = "Chao-Tung Yang and Wen-Chung Shih and Shian-Shyong Tseng", title = "Dynamic partitioning of loop iterations on heterogeneous {PC} clusters", journal = j-J-SUPERCOMPUTING, volume = "44", number = "1", pages = "1--23", month = apr, year = "2008", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-007-0146-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jul 9 17:32:34 MDT 2008", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=44&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=44&issue=1&spage=1", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", keywords = "Cluster computing; Heterogeneous; MPI programming; Parallel loops; PC clusters; Self-scheduling", } @Article{Ayguade:2009:DOT, author = "Eduard Ayguade and Nawal Copty and Alejandro Duran and Jay Hoeflinger and Yuan Lin and Federico Massaioli and Xavier Teruel and Priya Unnikrishnan and Guansong Zhang", title = "The Design of {OpenMP} Tasks", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "20", number = "3", pages = "404--418", month = mar, year = "2009", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2008.105", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu May 13 12:06:56 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Bikshandi:2009:EPI, author = "Ganesh Bikshandi and Jose G. Castanos and Sreedhar B. Kodali and V. Krishna Nandivada and Igor Peshansky and Vijay A. Saraswat and Sayantan Sur and Pradeep Varma and Tong Wen", title = "Efficient, portable implementation of asynchronous multi-place programs", journal = j-SIGPLAN, volume = "44", number = "4", pages = "271--282", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1594835.1504215", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The X10 programming language is organized around the notion of places (an encapsulation of data and activities operating on the data), partitioned global address space (PGAS), and asynchronous computation and communication.\par This paper introduces an expressive subset of X10, Flat X10, designed to permit efficient execution across multiple single-threaded places with a simple runtime and without compromising on the productivity of X10. We present the design, implementation and evaluation of a compiler and runtime system for Flat X10. The Flat X10 compiler translates programs into C++ SPMD programs communicating using an active messaging infrastructure. It uses novel techniques to transform explicitly parallel programs into SPMD programs. The runtime system is based on IBM's LAPI (Low-level API) and is easily portable to other libraries such as GASNet and ARMCI.\par Our implementation realizes performance comparable to hand-written MPI programs for well-known HPC benchmarks such as Random Access, Stream, and FFT, on a Federation-based cluster of Power5 SMPs (with hundreds of processors) and the Blue Gene (with thousands of processors). Submissions based on the work presented in this paper were co-winners of the 2007 and 2008 HPC Challenge Type II Awards.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "APGAS; asynchrony; compiler; FFT; HPC; HPC challenge; PGAS; random access; runtime; SPMD; stream; X10", } @Article{Bronevetsky:2009:CAC, author = "Greg Bronevetsky and John Gyllenhaal and Bronis R. de Supinski", title = "{CLOMP}: Accurately Characterizing {OpenMP} Application Overheads", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "3", pages = "250--265", month = jun, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=250", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Cappello:2009:FSI, author = "Franck Cappello and Thomas Herault and Jack Dongarra", title = "Foreword: Special issue: selected papers from the {14th European PVM\slash MPI Users Group Meeting}", journal = j-PARALLEL-COMPUTING, volume = "35", number = "12", pages = "571", year = "2009", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2009.11.001", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", MRclass = "68-06 (68M10 68M12)", MRnumber = "MR2596831", bibdate = "Sat Sep 4 17:11:07 2010", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Held in Paris, September 30--October 3, 2007", acknowledgement = ack-nhfb, fjournal = "Parallel Computing. Systems \& Applications", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Duran:2009:PEO, author = "Alejandro Duran and Roger Ferrer and Eduard Ayguad{\'e} and Rosa M. Badia and Jesus Labarta", title = "A Proposal to Extend the {OpenMP} Tasking Model with Dependent Tasks", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "3", pages = "292--305", month = jun, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=292", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Dursun:2009:MPM, author = "Hikmet Dursun and Kevin J. Barker and Darren J. Kerbyson and Scott Pakin and Richard Seymour and Rajiv K. Kalia and Aiichiro Nakano and Priya Vashishta", title = "An {MPI} Performance Monitoring Interface for Cell Based Compute Nodes", journal = j-PARALLEL-PROCESS-LETT, volume = "19", number = "4", pages = "535--552", month = dec, year = "2009", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626409000407", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:12 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{ElMaghraoui:2009:MIM, author = "K. {El Maghraoui} and Travis J. Desell and Boleslaw K. Szymanski and Carlos A. Varela", title = "Malleable iterative {MPI} applications", journal = j-CCPE, volume = "21", number = "3", pages = "393--413", day = "10", month = mar, year = "2009", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1362", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:30 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "1 Sep 2008", } @Article{Furlinger:2009:CAE, author = "Karl F{\"u}rlinger and Shirley Moore", title = "Capturing and Analyzing the Execution Control Flow of {OpenMP} Applications", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "3", pages = "266--276", month = jun, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=266", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Genaud:2009:FMP, author = "St{\'e}phane Genaud and Emmanuel Jeannot and Choopan Rattanapoka", title = "Fault-Management in {P2P-MPI}", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "5", pages = "433--461", month = oct, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:48 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=5&spage=433", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Giannoutakis:2009:DIP, author = "Konstantinos M. Giannoutakis and George A. Gravvanis", title = "Design and implementation of parallel approximate inverse classes using {OpenMP}", journal = j-CCPE, volume = "21", number = "2", pages = "115--131", month = feb, year = "2009", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1324", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:30 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "6 Jun 2008", } @TechReport{Granat:2009:NPQ, author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel Kressner", title = "A novel parallel {QR} algorithm for hybrid distributed memory {HPC} systems", type = "LAPACK Working Note", number = "216", institution = "Department of Computing Science and HPC2N", address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden", month = apr, year = "2009", bibdate = "Fri Apr 24 12:25:43 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn216.pdf", abstract = "A novel variant of the parallel QR algorithm for solving dense nonsymmetric eigenvalue problems on hybrid distributed high performance computing (HPC) systems is presented. For this purpose, we introduce the concept of multi-window bulge chain chasing and parallelize aggressive early deflation. The multi-window approach ensures that most computations when chasing chains of bulges are performed in level 3 BLAS operations, while the aim of aggressive early deflation is to speed up the convergence of the QR algorithm. Mixed MPI-OpenMP coding techniques are utilized for porting the codes to distributed memory platforms with multithreaded nodes, such as multicore processors. Numerous numerical experiments confirm the superior performance of our parallel QR algorithm in comparison with the existing ScaLAPACK code, leading to an implementation that is one to two orders of magnitude faster for sufficiently large problems, including a number of examples from applications.", acknowledgement = ack-nhfb, keywords = "aggressive early deflation; bulge chasing; Eigenvalue problem; hybrid distributed memory systems.; level 3 performance; multishift; nonsymmetric QR algorithm; parallel algorithms; parallel computations", utknumber = "UMINF-09.06", } @Article{Gravvanis:2009:OBP, author = "George A. Gravvanis", title = "{OpenMP} based parallel normalized direct methods for sparse finite element linear systems", journal = j-J-SUPERCOMPUTING, volume = "47", number = "1", pages = "44--52", month = jan, year = "2009", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Aug 25 08:38:28 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=47&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=47&issue=1&spage=44", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Hadjidoukas:2009:HPF, author = "P. E. Hadjidoukas and V. V. Dimakopoulos and M. Delakis and C. Garcia", title = "A high-performance face detection system using {OpenMP}", journal = j-CCPE, volume = "21", number = "15", pages = "1819--1837", month = oct, year = "2009", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1389", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:38 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "26 Mar 2009", } @Article{He:2009:AVS, author = "Jian He and Layne T. Watson and Masha Sosonkina", title = "{Algorithm 897}: {VTDIRECT95}: {Serial} and parallel codes for the global optimization algorithm direct", journal = j-TOMS, volume = "36", number = "3", pages = "17:1--17:24", month = jul, year = "2009", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1527286.1527291", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Jul 21 14:09:07 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", note = "See remark \cite{Sosonkina:2015:RAV}.", abstract = "VTDIRECT95 is a Fortran 95 implementation of D. R. Jones' deterministic global optimization algorithm called {\em DIRECT}, which is widely used in multidisciplinary engineering design, biological science, and physical science applications. The package includes both a serial code and a data-distributed massively parallel code for different problem scales and optimization (exploration vs. exploitation) goals. Dynamic data structures are used to organize local data, handle unpredictable memory requirements, reduce the memory usage, and share the data across multiple processors. The parallel code employs a multilevel functional and data parallelism to boost concurrency and mitigate the data dependency, thus improving the load balancing and scalability. In addition, checkpointing features are integrated into both versions to provide fault tolerance and hot restarts. Important algorithm modifications and design considerations are discussed regarding data structures, parallel schemes, error handling, and portability. Using several benchmark functions and real-world applications, the software is evaluated on different systems in terms of optimization effectiveness, data structure efficiency, parallel performance, and checkpointing overhead. The package organization and usage are also described in detail.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "checkpointing; data structures; DIRECT; global optimization; parallel schemes", } @Article{Hilbrich:2009:MCC, author = "Tobias Hilbrich and Matthias S. M{\"u}ller and Bettina Krammer", title = "{MPI} Correctness Checking for {OpenMP\slash MPI} Applications", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "3", pages = "277--291", month = jun, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=277", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Hong:2009:AMG, author = "Sunpyo Hong and Hyesoon Kim", title = "An analytical model for a {GPU} architecture with memory-level and thread-level parallelism awareness", journal = j-COMP-ARCH-NEWS, volume = "37", number = "3", pages = "152--163", month = jun, year = "2009", CODEN = "CANED2", DOI = "https://doi.org/10.1145/1555815.1555775", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Tue Aug 11 18:12:55 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "GPU architectures are increasingly important in the multi-core era due to their high number of parallel processors. Programming thousands of massively parallel threads is a big challenge for software engineers, but understanding the performance bottlenecks of those parallel programs on GPU architectures to improve application performance is even more difficult. Current approaches rely on programmers to tune their applications by exploiting the design space exhaustively without fully understanding the performance characteristics of their applications.\par To provide insights into the performance bottlenecks of parallel applications on GPU architectures, we propose a simple analytical model that estimates the execution time of massively parallel programs. The key component of our model is estimating the number of parallel memory requests (we call this the memory warp parallelism) by considering the number of running threads and memory bandwidth. Based on the degree of memory warp parallelism, the model estimates the cost of memory requests, thereby estimating the overall execution time of a program. Comparisons between the outcome of the model and the actual execution time in several GPUs show that the geometric mean of absolute error of our model on micro-benchmarks is 5.4\% and on GPU computing applications is 13.3\%. All the applications are written in the CUDA programming language.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", keywords = "analytical model; CUDA; GPU architecture; memory level parallelism; performance estimation; warp level parallelism", } @Article{Huang:2009:EGO, author = "Lei Huang and Deepak Eachempati and Marcus W. Hervey and Barbara Chapman", title = "Exploiting global optimizations for {OpenMP} programs in the {OpenUH} compiler", journal = j-SIGPLAN, volume = "44", number = "4", pages = "289--290", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1504176.1504219", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The advent of new parallel architectures has increased the need for parallel optimizing compilers to assist developers in creating efficient code. OpenUH is a state-of-the-art optimizing compiler, but it only performs a limited set of optimizations for OpenMP programs due to its conservative assumptions of shared memory programming. These limitations may prevent some OpenMP applications from being fully optimized to the extent of its sequential counterpart. This paper describes our design and implementation of a parallel data flow framework, consisting of a Parallel Control Flow Graph (PCFG) and a Parallel SSA (PSSA) representation in OpenUH, to model data flow for OpenMP programs. This framework enables the OpenUH compiler to perform all classical scalar optimizations for OpenMP programs, in addition to conducting OpenMP specific optimizations.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "compiler analysis; OpenMP; Parallel SSA", } @Article{Kainz:2009:RCM, author = "Bernhard Kainz and Markus Grabner and Alexander Bornik and Stefan Hauswiesner and Judith Muehl and Dieter Schmalstieg", title = "Ray casting of multiple volumetric datasets with polyhedral boundaries on manycore {GPUs}", journal = j-TOG, volume = "28", number = "5", pages = "152:1--152:9", month = dec, year = "2009", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/1618452.1618498", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Mon Mar 15 09:01:55 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "We present a new GPU-based rendering system for ray casting of multiple volumes. Our approach supports a large number of volumes, complex translucent and concave polyhedral objects as well as CSG intersections of volumes and geometry in any combination. The system (including the rasterization stage) is implemented entirely in CUDA, which allows full control of the memory hierarchy, in particular access to high bandwidth and low latency shared memory. High depth complexity, which is problematic for conventional approaches based on depth peeling, can be handled successfully. As far as we know, our approach is the first framework for multivolume rendering which provides interactive frame rates when concurrently rendering more than 50 arbitrarily overlapping volumes on current graphics hardware.", acknowledgement = ack-nhfb, articleno = "152", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", } @Article{Klemm:2009:RTM, author = "Michael Klemm and Matthias Bezold and Stefan Gabriel and Ronald Veldema and Michael Philippsen", title = "Reparallelization techniques for migrating {OpenMP} codes in computational grids", journal = j-CCPE, volume = "21", number = "3", pages = "281--299", day = "10", month = mar, year = "2009", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1356", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:30 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "6 Aug 2008", } @InProceedings{Klimach:2009:PCH, author = "Harald Klimach and Sabine P. Roller", title = "Parallel Coupling of Heterogeneous Domains with {KOP3D} using {PACX-MPI}", crossref = "Tuncer:2009:PCF", volume = "67", pages = "339--345", year = "2009", DOI = "https://doi.org/10.1007/978-3-540-92744-0_42", bibdate = "Sat Dec 22 08:34:16 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-92744-0_42", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-540-92744-0", book-URL = "http://www.springerlink.com/content/978-3-540-92744-0", } @Article{Komatitsch:2009:PHO, author = "Dimitri Komatitsch and David Mich{\'e}a and Gordon Erlebacher", title = "Porting a high-order finite-element earthquake modeling application to {NVIDIA} graphics cards using {CUDA}", journal = j-J-PAR-DIST-COMP, volume = "69", number = "5", pages = "451--460", month = may, year = "2009", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Sep 1 17:08:39 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @InProceedings{Langdon:2009:FHQ, author = "W. B. Langdon", editor = "Franz Rothlauf", booktitle = "{GECCO '09 Proceedings of the 11th Annual Conference Companion on Genetic and Evolutionary Computation Conference: Late Breaking Papers}", title = "A fast high quality pseudo random number generator for {nVidia CUDA}", publisher = pub-ACM, address = pub-ACM:adr, pages = "2511--2513", year = "2009", DOI = "https://doi.org/10.1145/1570256.1570353", ISBN = "1-60558-505-X", ISBN-13 = "978-1-60558-505-5", LCCN = "????", bibdate = "Fri Jan 06 09:34:05 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/gp-code/random-numbers/cuda_park-miller.tar.gz", acknowledgement = ack-nhfb, keywords = "GGL generator (LCG(16 807, 0, $2^{31} - 1$))", meetingname = "Proceedings of the 11th annual Conference Companion on Genetic and Evolutionary Computation: July 8--12, 2009, Montreal, Quebec, Canada", } @Article{Lee:2009:OGC, author = "Seyong Lee and Seung-Jai Min and Rudolf Eigenmann", title = "{OpenMP} to {GPGPU}: a compiler framework for automatic translation and optimization", journal = j-SIGPLAN, volume = "44", number = "4", pages = "101--110", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1504176.1504194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "GPGPUs have recently emerged as powerful vehicles for general-purpose high-performance computing. Although a new Compute Unified Device Architecture (CUDA) programming model from NVIDIA offers improved programmability for general computing, programming GPGPUs is still complex and error-prone. This paper presents a compiler framework for automatic source-to-source translation of standard OpenMP applications into CUDA-based GPGPU applications. The goal of this translation is to further improve programmability and make existing OpenMP applications amenable to execution on GPGPUs. In this paper, we have identified several key transformation techniques, which enable efficient GPU global memory access, to achieve high performance. Experimental results from two important kernels (JACOBI and SPMUL) and two NAS OpenMP Parallel Benchmarks (EP and CG) show that the described translator and compile-time optimizations work well on both regular and irregular applications, leading to performance improvements of up to 50X over the unoptimized translation (up to 328X over serial).", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "automatic translation; compiler optimization; CUDA; GPU; OpenMP", } @Article{Ma:2009:CRS, author = "Wenjing Ma and Gagan Agrawal", title = "A compiler and runtime system for enabling data mining applications on {GPUs}", journal = j-SIGPLAN, volume = "44", number = "4", pages = "287--288", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1594835.1504218", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2000.bib", abstract = "With increasing need for accelerating data mining and scientific data analysis on large data sets, and less chance to improve processor performance by simply increasing clock frequencies, multi-core architectures and accelerators like FPGAs and GPUs have become popular. A recent development in using GPU for general computing has been the release of CUDA (Compute Unified Device Architecture) by NVIDIA. CUDA allows GPU programming with C-language-like features, thus easing the development of non-graphics applications on a GPU. However, several challenges still remain in programming the GPUs with CUDA, because CUDA involves explicit parallel programming and management of its complex memory hierarchy, as well as allocating device memory, moving data between CPU and device memory, and specification of thread grid configurations.\par In this paper, we offer a solution for the programmers to generate CUDA code by specifying the sequential reduction loop(s) with some information about the parameters. With program analysis and code generation, the applications are mapped to a GPU. Several additional optimizations are also performed by the middleware.\par We have evaluated our system using three popular data mining applications, k-means clustering, EM clustering, and Principal Component Analysis (PCA). The speedup that each of these applications achieve over a sequential CPU version ranges between 20 and 50.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "CUDA; data mining; GPGPU", } @Article{Marowka:2009:BCT, author = "Ami Marowka", title = "{BSP2OMP}: a Compiler For Translating {BSP} Programs To {OpenMP}", journal = j-INT-J-PAR-EMER-DIST-SYS, volume = "24", number = "4", pages = "293--310", year = "2009", CODEN = "????", ISSN = "1744-5760 (print), 1744-5779 (electronic)", ISSN-L = "1744-5760", bibdate = "Thu Sep 2 08:12:37 MDT 2010", bibsource = "http://www.informaworld.com/smpp/title~content=t713729127~link=cover; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, journal-URL = "http://www.tandfonline.com/loi/gpaa20", keywords = "BSP; BSP2OMP; EPCC; multicore; OpenMP", } @Article{Miguel-Alonso:2009:INS, author = "J. Miguel-Alonso and J. Navaridas and F. J. Ridruejo", title = "Interconnection Network Simulation Using Traces of {MPI} Applications", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "2", pages = "153--174", month = apr, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=2&spage=153", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Ozgun:2009:PCB, author = "Ozlem Ozgun and Raj Mittra and Mustafa Kuzuo{\u{g}}lu", title = "Parallelized Characteristic Basis Finite Element Method ({CBFEM--MPI}) --- a non-iterative domain decomposition algorithm for electromagnetic scattering problems", journal = j-J-COMPUT-PHYS, volume = "228", number = "6", pages = "2225--2238", day = "1", month = apr, year = "2009", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2008.12.002", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Mon Jan 2 22:14:07 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999108006293", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Rashti:2009:SAM, author = "Mohammad J. Rashti and Ahmad Afsahi", title = "A Speculative and Adaptive {MPI} Rendezvous Protocol Over {RDMA}-enabled Interconnects", journal = j-INT-J-PARALLEL-PROG, volume = "37", number = "2", pages = "223--246", month = apr, year = "2009", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:47 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=2&spage=223", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Schneider:2009:CPM, author = "Scott Schneider and Jae-Seung Yeom and Benjamin Rose and John C. Linford and Adrian Sandu and Dimitrios S. Nikolopoulos", title = "A comparison of programming models for multiprocessors with explicitly managed memory hierarchies", journal = j-SIGPLAN, volume = "44", number = "4", pages = "131--140", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1594835.1504197", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "On multiprocessors with explicitly managed memory hierarchies (EMM), software has the responsibility of moving data in and out of fast local memories. This task can be complex and error-prone even for expert programmers. Before we can allow compilers to handle this complexity for us, we must identify the abstractions that are general enough to allow us to write applications with reasonable effort, yet specific enough to exploit the vast on-chip memory bandwidth of EMM multi-processors. To this end, we compare two programming models against hand-tuned codes on the STI Cell, paying attention to programmability and performance. The first programming model, Sequoia, abstracts the memory hierarchy as private address spaces, each corresponding to a parallel task. The second, Cellgen, is a new framework which provides OpenMP-like semantics and the abstraction of a shared address space divided into private and shared data. We compare three applications programmed using these models against their hand-optimized counterparts in terms of abstractions, programming complexity, and performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "cell be; explicitly managed memory hierarchies; programming models", } @Article{Schwarz:2009:GFG, author = "Michael Schwarz and Marc Stamminger", title = "{GPU}: Fast {GPU}-based Adaptive Tessellation with {CUDA}", journal = j-CGF, volume = "28", number = "2", pages = "365--374", month = apr, year = "2009", CODEN = "CGFODY", DOI = "https://doi.org/10.1111/j.1467-8659.2009.01376.x", ISSN = "0167-7055 (print), 1467-8659 (electronic)", ISSN-L = "0167-7055", bibdate = "Sat May 11 13:27:16 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/cgf.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Com{\-}pu{\-}ter Graphics Forum", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/", onlinedate = "27 Mar 2009", } @Article{Tabakin:2009:QPE, author = "Frank Tabakin and Bruno Juli{\'a}-D{\'\i}az", title = "{QCMPI}: a parallel environment for quantum computing", journal = j-COMP-PHYS-COMM, volume = "180", number = "6", pages = "948--964", month = jun, year = "2009", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2008.11.021", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Feb 13 23:42:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465508004141", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Tallent:2009:EPM, author = "Nathan R. Tallent and John M. Mellor-Crummey", title = "Effective performance measurement and analysis of multithreaded applications", journal = j-SIGPLAN, volume = "44", number = "4", pages = "229--240", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1504176.1504210", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Understanding why the performance of a multithreaded program does not improve linearly with the number of cores in a shared-memory node populated with one or more multicore processors is a problem of growing practical importance. This paper makes three contributions to performance analysis of multithreaded programs. First, we describe how to measure and attribute {\em parallel idleness}, namely, where threads are stalled and unable to work. This technique applies broadly to programming models ranging from explicit threading ({\em e.g.}, Pthreads) to higher-level models such as Cilk and OpenMP. Second, we describe how to measure and attribute {\em parallel overhead\/} -- when a thread is performing miscellaneous work other than executing the user's computation. By employing a combination of compiler support and post-mortem analysis, we incur no measurement cost beyond normal profiling to glean this information. Using {\em idleness\/} and {\em overhead\/} metrics enables one to pinpoint areas of an application where concurrency should be increased (to reduce idleness), decreased (to reduce overhead), or where the present parallelization is hopeless (where idleness and overhead are both high). Third, we describe how to measure and attribute arbitrary performance metrics for high-level multithreaded programming models, such as Cilk. This requires bridging the gap between the expression of logical concurrency in programs and its realization at run-time as it is adaptively partitioned and scheduled onto a pool of threads. We have prototyped these ideas in the context of Rice University's HPCToolkit performance tools. We describe our approach, implementation, and experiences applying this approach to measure and attribute work, idleness, and overhead in executions of Cilk programs.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "call path profiling; hpctoolkit; multithreaded programming models; performance analysis", } @Article{Thakur:2009:TSE, author = "Rajeev Thakur and William Gropp", title = "Test suite for evaluating performance of multithreaded {MPI} communication", journal = j-PARALLEL-COMPUTING, volume = "35", number = "12", pages = "608--617", month = dec, year = "2009", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:11 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tournavitis:2009:THA, author = "Georgios Tournavitis and Zheng Wang and Bj{\"o}rn Franke and Michael F. P. O'Boyle", title = "Towards a holistic approach to auto-parallelization: integrating profile-driven parallelism detection and machine-learning based mapping", journal = j-SIGPLAN, volume = "44", number = "6", pages = "177--187", month = jun, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1542476.1542496", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Jun 16 14:41:16 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Compiler-based auto-parallelization is a much studied area, yet has still not found wide-spread application. This is largely due to the poor exploitation of application parallelism, subsequently resulting in performance levels far below those which a skilled expert programmer could achieve. We have identified two weaknesses in traditional parallelizing compilers and propose a novel, integrated approach, resulting in significant performance improvements of the generated parallel code. Using profile-driven parallelism detection we overcome the limitations of static analysis, enabling us to identify more application parallelism and only rely on the user for final approval. In addition, we replace the traditional target-specific and inflexible mapping heuristics with a machine-learning based prediction mechanism, resulting in better mapping decisions while providing more scope for adaptation to different target architectures. We have evaluated our parallelization strategy against the NAS and SPEC OMP benchmarks and two different multi-core platforms (dual quad-core Intel Xeon SMP and dual-socket QS20 Cell blade). We demonstrate that our approach not only yields significant improvements when compared with state-of-the-art parallelizing compilers, but comes close to and sometimes exceeds the performance of manually parallelized codes. On average, our methodology achieves 96\% of the performance of the hand-tuned OpenMP NAS and SPEC parallel benchmarks on the Intel Xeon platform and gains a significant speedup for the IBM Cell platform, demonstrating the potential of profile-guided and machine-learning based parallelization for complex multi-core platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "auto-parallelization; machine-learning based parallelism mapping; OpenMP; profile-driven parallelism detection", } @Article{Udupa:2009:SES, author = "Abhishek Udupa and R. Govindarajan and Matthew J. Thazhuthaveetil", title = "Synergistic execution of stream programs on multicores with accelerators", journal = j-SIGPLAN, volume = "44", number = "7", pages = "99--108", month = jul, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1542452.1542466", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Jun 26 12:07:39 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2000.bib", abstract = "The StreamIt programming model has been proposed to exploit parallelism in streaming applications on general purpose multicore architectures. The StreamIt graphs describe task, data and pipeline parallelism which can be exploited on accelerators such as Graphics Processing Units (GPUs) or CellBE which support abundant parallelism in hardware.\par In this paper, we describe a novel method to orchestrate the execution of a StreamIt program on a multicore platform equipped with an accelerator. The proposed approach identifies, using profiling, the relative benefits of executing a task on the superscalar CPU cores and the accelerator. We formulate the problem of partitioning the work between the CPU cores and the GPU, taking into account the latencies for data transfers and the required buffer layout transformations associated with the partitioning, as an integrated Integer Linear Program (ILP) which can then be solved by an ILP solver. We also propose an efficient heuristic algorithm for the work partitioning between the CPU and the GPU, which provides solutions which are within 9.05\% of the optimal solution on an average across the benchmark suite. The partitioned tasks are then software pipelined to execute on the multiple CPU cores and the Streaming Multiprocessors (SMs) of the GPU. The software pipelining algorithm orchestrates the execution between CPU cores and the GPU by emitting the code for the CPU and the GPU, and the code for the required data transfers. Our experiments on a platform with 8 CPU cores and a GeForce 8800 GTS 512 GPU show a geometric mean speedup of 6.84X with a maximum of 51.96X over a single threaded CPU execution across the StreamIt benchmarks. This is a 18.9\% improvement over a partitioning strategy that maps only the filters that cannot be executed on the GPU -- the filters with state that is persistent across firings -- onto the CPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "CUDA; GPU programming; partitioning; software pipelining; stream programming", } @Article{Vo:2009:FVP, author = "Anh Vo and Sarvani Vakkalanka and Michael DeLisi and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur", title = "Formal verification of practical {MPI} programs", journal = j-SIGPLAN, volume = "44", number = "4", pages = "261--270", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1594835.1504214", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "This paper considers the problem of formal verification of MPI programs operating under a fixed test harness for safety properties without building verification models. In our approach, we directly model-check the MPI/C source code, executing its interleavings with the help of a verification scheduler. Unfortunately, the total feasible number of interleavings is exponential, and impractical to examine even for our modest goals. Our earlier publications formalized and implemented a partial order reduction approach that avoided exploring equivalent interleavings, and presented a verification tool called ISP. This paper presents algorithmic and engineering innovations to ISP, including the use of OpenMP parallelization, that now enables it to handle practical MPI programs, including:(i) ParMETIS --- a widely used hypergraph partitioner, and (ii) MADRE --- a Memory Aware Data Re-distribution Engine, both developed outside our group. Over these benchmarks, ISP has automatically verified up to 14K lines of MPI/C code, producing error traces of deadlocks and assertion violations within seconds.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "distributed programming; dynamic partial order reduction; message passing interface; model checking; MPI", } @Article{Walters:2009:RBF, author = "John Paul Walters and Vipin Chaudhary", title = "Replication-Based Fault Tolerance for {MPI} Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "20", number = "7", pages = "997--1010", month = jul, year = "2009", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2008.172", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu May 13 12:06:56 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Wang:2009:MPM, author = "Zheng Wang and Michael F. P. O'Boyle", title = "Mapping parallelism to multi-cores: a machine learning based approach", journal = j-SIGPLAN, volume = "44", number = "4", pages = "75--84", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1504176.1504189", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The efficient mapping of program parallelism to multi-core processors is highly dependent on the underlying architecture. This paper proposes a portable and automatic compiler-based approach to mapping such parallelism using machine learning. It develops two predictors: a data sensitive and a data insensitive predictor to select the best mapping for parallel programs. They predict the number of threads and the scheduling policy for any given program using a model learnt off-line. By using low-cost profiling runs, they predict the mapping for a new unseen program across multiple input data sets. We evaluate our approach by selecting parallelism mapping configurations for OpenMP programs on two representative but different multi-core platforms (the Intel Xeon and the Cell processors). Performance of our technique is stable across programs and architectures. On average, it delivers above 96\% performance of the maximum available on both platforms. It achieve, on average, a 37\% (up to 17.5 {\em times\/}) performance improvement over the OpenMP runtime default scheme on the Cell platform. Compared to two recent prediction models, our predictors achieve better performance with a significant lower profiling cost.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "artificial neural networks; compiler optimization; machine learning; performance modeling; support vector machine", } @Article{Xue:2009:MSR, author = "Ruini Xue and Xuezheng Liu and Ming Wu and Zhenyu Guo and Wenguang Chen and Weimin Zheng and Zheng Zhang and Geoffrey Voelker", title = "{MPIWiz}: subgroup reproducible replay of {MPI} applications", journal = j-SIGPLAN, volume = "44", number = "4", pages = "251--260", month = apr, year = "2009", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1504176.1504213", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 9 08:40:49 MDT 2009", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Message Passing Interface (MPI) is a widely used standard for managing coarse-grained concurrency on distributed computers. Debugging parallel MPI applications, however, has always been a particularly challenging task due to their high degree of concurrent execution and non-deterministic behavior. Deterministic replay is a potentially powerful technique for addressing these challenges, with existing MPI replay tools adopting either data-replay or order-replay approaches. Unfortunately, each approach has its tradeoffs. Data-replay generates substantial log sizes by recording every communication message. Order-replay generates small logs, but requires all processes to be replayed together. We believe that these drawbacks are the primary reasons that inhibit the wide adoption of deterministic replay as the critical enabler of cyclic debugging of MPI applications.\par This paper describes {\em subgroup reproducible replay\/} (SRR), a hybrid deterministic replay method that provides the benefits of both data-replay and order-replay while balancing their trade-offs. SRR divides all processes into disjoint groups. It records the contents of messages crossing group boundaries as in data-replay, but records just message orderings for communication within a group as in order-replay. In this way, SRR can exploit the communication locality of traffic patterns in MPI applications. During replay, developers can then replay each group individually. SRR reduces recording overhead by not recording intra-group communication, and reduces replay overhead by limiting the size of each replay group. Exposing these tradeoffs gives the user the necessary control for making deterministic replay practical for MPI applications.\par We have implemented a prototype, MPIWiz, to demonstrate and evaluate SRR. MPIWiz employs a replay framework that allows transparent binary instrumentation of both library and system calls. As a result, MPIWiz replays MPI applications with no source code modification and relinking, and handles non-determinism in both MPI and OS system calls. Our preliminary results show that MPIWiz can reduce recording overhead by over a factor of four relative to data-replay, yet without requiring the entire application to be replayed as in order-replay. Recording increases execution time by 27\% while the application can be replayed in just 53\% of its base execution time.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "distributed debugging; message passing interface; non-determinism; record and replay", } @Article{Yang:2009:DBM, author = "Chao-Tung Yang and Kuan-Chou Lai", title = "A directive-based {MPI} code generator for {Linux PC} clusters", journal = j-J-SUPERCOMPUTING, volume = "50", number = "2", pages = "177--207", month = nov, year = "2009", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Aug 25 08:38:43 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=50&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=50&issue=2&spage=177", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @InProceedings{Yilmaz:2009:HPC, author = "E. Yilmaz and R. U. Payli and H. U. Akay and A. Ecer", title = "Hybrid Parallelism for {CFD} Simulations: Combining {MPI} with {OpenMP}", crossref = "Tuncer:2009:PCF", volume = "67", pages = "401--408", year = "2009", DOI = "https://doi.org/10.1007/978-3-540-92744-0_50", bibdate = "Sat Dec 22 08:34:16 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-92744-0_50", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-540-92744-0", book-URL = "http://www.springerlink.com/content/978-3-540-92744-0", } @Article{Ayguade:2010:EOS, author = "Eduard Ayguad{\'e} and Rosa M. Badia and Pieter Bellens and Daniel Cabrera and Alejandro Duran Roger Ferrer and Marc Gonz{\'a}lez and Francisco Igual and Daniel Jim{\'e}nez-Gonz{\'a}lez and Jes{\'u}s Labarta and Luis Martinell and Xavier Martorell and Rafael Mayo and Josep M. P{\'e}rez and Judit Planas and Enrique S. Quintana-Ort{\'\i}", title = "Extending {OpenMP} to Survive the Heterogeneous Multi-Core Era", journal = j-INT-J-PARALLEL-PROG, volume = "38", number = "5--6", pages = "440--459", month = oct, year = "2010", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:49 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=440", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Baghsorkhi:2010:APM, author = "Sara S. Baghsorkhi and Matthieu Delahaye and Sanjay J. Patel and William D. Gropp and Wen-mei W. Hwu", title = "An adaptive performance modeling tool for {GPU} architectures", journal = j-SIGPLAN, volume = "45", number = "5", pages = "105--114", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693470", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents an analytical model to predict the performance of\par general-purpose applications on a GPU architecture. The model is designed to provide performance information to an auto-tuning compiler and assist it in narrowing down the search to the more promising implementations. It can also be incorporated into a tool to help programmers better assess the performance bottlenecks in their code. We analyze each GPU kernel and identify how the kernel exercises major GPU microarchitecture features. To identify the performance bottlenecks accurately, we introduce an abstract interpretation of a GPU kernel, {\em work flow graph}, based on which we estimate the execution time of a GPU kernel. We validated our performance model on the NVIDIA GPUs using CUDA (Compute Unified Device Architecture). For this purpose, we used data parallel benchmarks that stress different GPU microarchitecture events such as uncoalesced memory accesses, scratch-pad memory bank conflicts, and control flow divergence, which must be accurately modeled but represent challenges to the analytical performance models. The proposed model captures full system complexity and shows high accuracy in predicting the performance trends of different optimized kernel implementations. We also describe our approach to extracting the performance model automatically from a kernel code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "analytical model; GPU; parallel programming; performance estimation", } @Article{Balaji:2010:FGM, author = "Pavan Balaji and Darius Buntinas and David Goodell and William Gropp and Rajeev Thakur", title = "Fine-Grained Multithreading Support for Hybrid Threaded {MPI} Programming", journal = j-IJHPCA, volume = "24", number = "1", pages = "49--57", month = feb, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009360206", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/1/49.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Balaji:2010:IND, author = "Pavan Balaji and Anthony Chan and William Gropp and Rajeev Thakur and Ewing Lusk", title = "The Importance of Non-Data-Communication Overheads in {MPI}", journal = j-IJHPCA, volume = "24", number = "1", pages = "5--15", month = feb, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009359258", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/1/5.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Blas:2010:IEF, author = "Javier Garcia Blas and Florin Isaila and Jesus Carretero and David Singh and Felix Garcia-Carballeira", title = "Implementation and Evaluation of File Write-Back and Prefetching for {MPI-IO} Over {GPFS}", journal = j-IJHPCA, volume = "24", number = "1", pages = "78--92", month = feb, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009359015", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/1/78.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Brightwell:2010:EDA, author = "Ron Brightwell", title = "Exploiting Direct Access Shared Memory for {MPI} on Multi-Core Processors", journal = j-IJHPCA, volume = "24", number = "1", pages = "69--77", month = feb, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009359014", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/1/69.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Broquedis:2010:FEO, author = "Fran{\c{c}}ois Broquedis and Nathalie Furmento and Brice Goglin and Pierre-Andr{\'e} Wacrenier and Raymond Namyst", title = "{ForestGOMP}: An Efficient {OpenMP} Environment for {NUMA} Architectures", journal = j-INT-J-PARALLEL-PROG, volume = "38", number = "5--6", pages = "418--439", month = oct, year = "2010", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:49 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=418", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Bull:2010:PEM, author = "J. Mark Bull and James Enright and Xu Guo and Chris Maynard and Fiona Reid", title = "Performance Evaluation of Mixed-Mode {OpenMP\slash MPI} Implementations", journal = j-INT-J-PARALLEL-PROG, volume = "38", number = "5--6", pages = "396--417", month = oct, year = "2010", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:49 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=396", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Campanoni:2010:HFP, author = "Simone Campanoni and Giovanni Agosta and Stefano Crespi Reghizzi and Andrea Di Biagio", title = "A highly flexible, parallel virtual machine: design and experience of {ILDJIT}", journal = j-SPE, volume = "40", number = "2", pages = "177--207", day = "??", month = feb, year = "2010", CODEN = "SPEXBL", DOI = "https://doi.org/10.1002/spe.950", ISSN = "0038-0644 (print), 1097-024X (electronic)", ISSN-L = "0038-0644", bibdate = "Wed Mar 17 10:16:22 MDT 2010", bibsource = "http://www.interscience.wiley.com/jpages/0038-0644; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Soft{\-}ware\emdash Prac{\-}tice and Experience", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X", onlinedate = "Jan 14 2010 4:49AM", } @Article{Cardoso:2010:MSO, author = "M. C. Cardoso and F. M. Costa", title = "{MPI} support on opportunistic grids based on the {InteGrade} middleware", journal = j-CCPE, volume = "22", number = "3", pages = "343--357", day = "10", month = mar, year = "2010", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1479", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:41 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "21 Sep 2009", } @Article{Carter:2010:PLN, author = "John D. Carter and William B. Gardner and Gary Grewal", title = "The {Pilot} library for novice {MPI} programmers", journal = j-SIGPLAN, volume = "45", number = "5", pages = "351--352", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693509", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The Pilot library is a new method for programming MPI-enabled clusters in C, targeted at novice parallel programmers. Formal elements from Communicating Sequential Processes (CSP) are used to realize a process/channel model of parallel computation that reduces opportunities for deadlock and other communication errors. This simple model, plus an application programming inter-face (API) styled after C's formatted I/O, are designed to make the library easy to learn. The Pilot library exists as a thin layer on top of any standard Message Passing Interface (MPI) implementation, preserving MPI's portability and efficiency, with little performance overhead arising as result of Pilot's additional features.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "C; cluster programming; collective operations; deadlock detection; high-performance computing; MPI", } @Article{Casas:2010:APD, author = "Marc Casas and Rosa M. Badia and Jes{\'u}s Labarta", title = "Automatic Phase Detection and Structure Extraction of {MPI} Applications", journal = j-IJHPCA, volume = "24", number = "3", pages = "335--360", month = aug, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009360039", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:46 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/3.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/3/335.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Cheng:2010:BRBb, author = "Jie Cheng", title = "Book Review: {{\booktitle{CUDA by Example: An Introduction to General-Purpose GPU Programming}}, by Jason Sanders and Edward Kandrot, ISBN-13 978-0-13-138768-3}", journal = j-SCPE, volume = "11", number = "4", pages = "401--401", month = dec, year = "2010", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Sat Nov 10 09:03:30 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib; http://www.scpe.org/index.php/scpe/issue/view/91", note = "See \cite{Sanders:2010:CEI}.", URL = "http://www.scpe.org/index.php/scpe/article/view/663", acknowledgement = ack-nhfb, remark = "Special Issue: Network Management in Distributed Systems.", } @Article{Cho:2010:OPP, author = "S. M. Cho and D. W. Im and O. Y. Jang and H. J. Song and B. D. Paulovicks and V. Sheinin and H. Yeo", title = "{OpenCL} and parallel primitives for digital {TV} applications", journal = j-IBM-JRD, volume = "54", number = "5", pages = "7:1--7:14", month = "????", year = "2010", CODEN = "IBMJAE", DOI = "https://doi.org/10.1147/JRD.2010.2062050", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Sun Feb 20 14:29:19 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/ibmjrd.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.research.ibm.com/journal/", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", } @Article{Chou:2010:CMI, author = "Yu-Cheng Chou and Stephen S. Nestinger and Harry H. Cheng", title = "{Ch MPI}: Interpretive Parallel Computing in {C}", journal = j-COMPUT-SCI-ENG, volume = "12", number = "2", pages = "54--67", month = mar # "\slash " # apr, year = "2010", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2010.36", ISSN = "0740-7475 (print), 1558-1918 (electronic)", ISSN-L = "1521-9615", bibdate = "Thu May 13 11:08:14 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Dickens:2010:HPI, author = "Phillip M. Dickens and Jeremy Logan", title = "A high performance implementation of {MPI-IO} for a {Lustre} file system environment", journal = j-CCPE, volume = "22", number = "11", pages = "1433--1449", day = "10", month = aug, year = "2010", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1491", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:46 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "8 Sep 2009", } @TechReport{Du:2010:COT, author = "Peng Du and Rick Weber and Piotr Luszczek and Stanimire Tomov and Gregory Peterson and Jack Dongarra", title = "From {CUDA} to {OpenCL}: Towards a Performance-portable Solution for Multi-platform {GPU} Programming", type = "LAPACK Working Note", number = "228", institution = inst-UTK-CS, address = inst-UTK-CS:adr, day = "6", month = sep, year = "2010", bibdate = "Wed Aug 24 12:36:41 MDT 2011", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "UT-CS-10-656.", URL = "http://www.netlib.org/lapack/lawnspdf/lawn228.pdf", acknowledgement = ack-nhfb, } @Article{FerreiradaSilva:2010:PBC, author = "Adelino {Ferreira da Silva}", title = "\pkg{cudaBayesreg}: {Bayesian} Computation in {CUDA}", journal = j-R-JOURNAL, volume = "2", number = "2", pages = "48--55", month = dec, year = "2010", CODEN = "????", ISSN = "2073-4859", bibdate = "Thu Aug 13 15:54:57 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/rjournal.bib", URL = "http://journal.r-project.org/archive/2010-2/RJournal_2010-2_Ferreira~da-Silva.pdf", acknowledgement = ack-r-project, fjournal = "The R Journal", journal-URL = "http://journal.r-project.org/", } @Article{Gelado:2010:ADS, author = "Isaac Gelado and Javier Cabezas and Nacho Navarro and John E. Stone and Sanjay Patel and Wen-mei W. Hwu", title = "An asymmetric distributed shared memory model for heterogeneous parallel systems", journal = j-SIGPLAN, volume = "45", number = "3", pages = "347--358", month = mar, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1735970.1736059", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Mar 17 13:46:56 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib", abstract = "Heterogeneous computing combines general purpose CPUs with accelerators to efficiently execute both sequential control-intensive and data-parallel phases of applications. Existing programming models for heterogeneous computing rely on programmers to explicitly manage data transfers between the CPU system memory and accelerator memory.\par This paper presents a new programming model for heterogeneous computing, called Asymmetric Distributed Shared Memory (ADSM), that maintains a shared logical memory space for CPUs to access objects in the accelerator physical memory but not vice versa. The asymmetry allows light-weight implementations that avoid common pitfalls of symmetrical distributed shared memory systems. ADSM allows programmers to assign data objects to performance critical methods. When a method is selected for accelerator execution, its associated data objects are allocated within the shared logical memory space, which is hosted in the accelerator physical memory and transparently accessible by the methods executed on CPUs.\par We argue that ADSM reduces programming efforts for heterogeneous computing systems and enhances application portability. We present a software implementation of ADSM, called GMAC, on top of CUDA in a GNU/Linux environment. We show that applications written in ADSM and running on top of GMAC achieve performance comparable to their counterparts using programmer-managed data transfers. This paper presents the GMAC system and evaluates different design choices. We further suggest additional architectural support that will likely allow GMAC to achieve higher application performance than the current CUDA model.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "asymmetric distributed shared memory; data-centric programming models; heterogeneous systems", } @Article{Granat:2010:PSS, author = "Robert Granat and Bo Kagstrom", title = "Parallel Solvers for {Sylvester}-Type Matrix Equations with Applications in Condition Estimation, {Part I}: Theory and Algorithms", journal = j-TOMS, volume = "37", number = "3", pages = "32:1--32:32", month = sep, year = "2010", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1824801.1824810", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon Sep 27 10:15:50 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Parallel ScaLAPACK-style algorithms for solving eight common standard and generalized Sylvester-type matrix equations and various sign and transposed variants are presented. All algorithms are blocked variants based on the Bartels--Stewart method and involve four major steps: reduction to triangular form, updating the right-hand side with respect to the reduction, computing the solution to the reduced triangular problem, and transforming the solution back to the original coordinate system. Novel parallel algorithms for solving reduced triangular matrix equations based on wavefront-like traversal of the right-hand side matrices are presented together with a generic scalability analysis. These algorithms are used in condition estimation and new robust parallel sep$^{ - 1}$ -estimators are developed. Experimental results from three parallel platforms, including results from a mixed OpenMP/MPI platform, are presented and analyzed using several performance and accuracy metrics. The analysis includes results regarding general and triangular parallel solvers as well as parallel condition estimators.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "condition estimation; Eigenvalue problems; library software; Sylvester matrix equations", } @Article{Gutierrez:2010:QCS, author = "Eladio Guti{\'e}rrez and Sergio Romero and Mar{\'\i}a A. Trenas and Emilio L. Zapata", title = "Quantum computer simulation using the {CUDA} programming model", journal = j-COMP-PHYS-COMM, volume = "181", number = "2", pages = "283--300", month = feb, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2009.09.021", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:27 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465509003117", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Hadjidoukas:2010:NOP, author = "Panagiotis E. Hadjidoukas and Laurent Amsaleg", title = "Nested {OpenMP} Parallelization of a Hierarchical Data Clustering Algorithm", journal = j-PARALLEL-PROCESS-LETT, volume = "20", number = "2", pages = "187--208", month = jun, year = "2010", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626410000144", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Thu Sep 2 09:08:12 MDT 2010", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Hamid:2010:CMB, author = "Nor Asilah Wati Abdul Hamid and Paul Coddington", title = "Comparison of {MPI} Benchmark Programs on Shared Memory and Distributed Memory Machines (Point-to-Point Communication)", journal = j-IJHPCA, volume = "24", number = "4", pages = "469--483", month = nov, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342010371106", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Sep 6 15:14:35 MDT 2011", bibsource = "http://hpc.sagepub.com/content/24/4.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/4/469.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "June 7, 2010", } @Article{Hawick:2010:PGC, author = "K. A. Hawick and A. Leist and D. P. Playne", title = "Parallel graph component labelling with {GPUs} and {CUDA}", journal = j-PARALLEL-COMPUTING, volume = "36", number = "12", pages = "655--678", month = dec, year = "2010", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Nov 1 10:18:30 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Hong:2010:IGP, author = "Sunpyo Hong and Hyesoon Kim", title = "An integrated {GPU} power and performance model", journal = j-COMP-ARCH-NEWS, volume = "38", number = "3", pages = "280--289", month = jun, year = "2010", CODEN = "CANED2", DOI = "https://doi.org/10.1145/1816038.1815998", ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)", ISSN-L = "0163-5964", bibdate = "Tue Jul 6 14:11:46 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "GPU architectures are increasingly important in the multi-core era due to their high number of parallel processors. Performance optimization for multi-core processors has been a challenge for programmers. Furthermore, optimizing for power consumption is even more difficult. Unfortunately, as a result of the high number of processors, the power consumption of many-core processors such as GPUs has increased significantly.\par Hence, in this paper, we propose an integrated power and performance (IPP) prediction model for a GPU architecture to predict the optimal number of active processors for a given application. The basic intuition is that when an application reaches the peak memory bandwidth, using more cores does not result in performance improvement.\par We develop an empirical power model for the GPU. Unlike most previous models, which require measured execution times, hardware performance counters, or architectural simulations, IPP predicts execution times to calculate dynamic power events. We then use the outcome of IPP to control the number of running cores. We also model the increases in power consumption that resulted from the increases in temperature.\par With the predicted optimal number of active cores, we show that we can save up to 22.09\%of runtime GPU energy consumption and on average 10.99\% of that for the five memory bandwidth-limited benchmarks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", keywords = "analytical model; CUDA; energy; GPU architecture; performance; power estimation", } @Article{Huang:2010:ELA, author = "Lei Huang and Haoqiang Jin and Liqi Yi and Barbara Chapman", title = "Enabling locality-aware computations in {OpenMP}", journal = j-SCI-PROG, volume = "18", number = "3--4", pages = "169--181", month = "????", year = "2010", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-2010-0307", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Dec 13 19:01:33 MST 2011", bibsource = "http://www.iospress.nl/journal/scientific-programming/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Isaila:2010:SMP, author = "Florin Isaila and Francisco Javier Garcia Blas and Jes{\'u}s Carretero and Wei-keng Liao and Alok Choudhary", title = "A Scalable {Message Passing Interface} Implementation of an Ad-Hoc Parallel {I/O} system", journal = j-IJHPCA, volume = "24", number = "2", pages = "164--184", month = may, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009347890", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:46 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/2.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/2/164.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Januszewski:2010:ANS, author = "M. Januszewski and M. Kostur", title = "Accelerating numerical solution of stochastic differential equations with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "181", number = "1", pages = "183--188", month = jan, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2009.09.009", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:27 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465509002999", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Jost:2010:EUH, author = "Gabriele Jost and Bob Robins", title = "Experiences using hybrid {MPI\slash OpenMP} in the real world: Parallelization of a {$3$D} {CFD} solver for multi-core node clusters", journal = j-SCI-PROG, volume = "18", number = "3--4", pages = "127--138", month = "????", year = "2010", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-2010-0308", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Dec 13 19:01:33 MST 2011", bibsource = "http://www.iospress.nl/journal/scientific-programming/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @InProceedings{Kamal:2010:EIN, author = "A. A. Kamal and A. M. Youssef", title = "Enhanced implementation of the {NTRUEncrypt} algorithm using graphics cards", crossref = "Chaudhuri:2010:PIC", pages = "168--174", year = "2010", DOI = "https://doi.org/10.1109/PDGC.2010.5679887", bibdate = "Thu Apr 21 10:40:48 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The NTRU encryption algorithm, also known as NTRUEncrypt, is a parameterized family of lattice-based public key cryptosystems that has been accepted to the IEEE P1363 standards under the specifications for lattice-based public-key cryptography (IEEE P1363.1). The operations of the NTRU encryption algorithm show good characteristics for data parallel processing which makes the NTRU a good candidate to benefit from the high degree of parallelism available in modern graphics processing units (GPUs). In this paper, we investigate different GPU implementation options for the NTRU encryption algorithm. Our implementation, on the NVIDIA GTX275 GPU, using the CUDA framework, achieves about 77 MB/s for NTRU with the parameter set $ (N, q, p) = (1171, 2048, 3) $.", acknowledgement = ack-nhfb, keywords = "ANSI X9.98-2010; NTRUEncrypt", } @Article{Kapinos:2010:PPP, author = "Paul Kapinos and Dieter an Mey", title = "Productivity and Performance Portability of the {OpenMP 3.0} Tasking Concept When Applied to an Engineering Code Written in {Fortran 95}", journal = j-INT-J-PARALLEL-PROG, volume = "38", number = "5--6", pages = "379--395", month = oct, year = "2010", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:49 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=379", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Khanna:2010:NMG, author = "Gaurav Khanna and Justin McKennon", title = "Numerical modeling of gravitational wave sources accelerated by {OpenCL}", journal = j-COMP-PHYS-COMM, volume = "181", number = "9", pages = "1605--1611", month = sep, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2010.05.014", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:30 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465510001682", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Book{Kirk:2010:PMP, author = "David B. Kirk and Wen-mei W. Hwu", title = "Programming Massively Parallel Processors: a Hands-on Approach", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xviii + 258", year = "2010", ISBN = "0-12-381472-3", ISBN-13 = "978-0-12-381472-2", LCCN = "QA76.642 .K57 2010", bibdate = "Thu Jul 29 13:33:50 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/master.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; z3950.bibsys.no:2100/BIBSYS; z3950.loc.gov:7090/Voyager", note = "Chapter 7 (pages 125--140) discusses GPU floating-point considerations.", acknowledgement = ack-nhfb, keywords = "CUDA; nVIDIA", libnote = "Not yet in my library.", subject = "parallel programming (computer science); parallel processing (electronic computers); multiprocessors; computer architecture", tableofcontents = "1: Introduction \\ 1.1 GPUs as Parallel Computers \\ 1.2 Architecture of a Modern GPU \\ 1.3 Why More Speed or Parallelism? \\ 1.4 Parallel Programming Languages and Models \\ 1.5 Overarching Goals \\ 1.6 Organization of the Book \\ 2: History of GPU Computing \\ 2.1. Evolution of Graphics Pipelines The Era of Fixed Function Graphics Pipeline Evolution of Programmable Real-Time Graphics Unified Graphics and Computing Processors \\ 2.2. GPGPU: an Intermediate Step Scalable GPUs Recent Developments Future Trends \\ 3: Introduction to CUDA \\ 3.1. Data Parallelism \\ 3.2. CUDA Program Structure \\ 3.3. A Matrix--Matrix Multiplication Example \\ 3.4. Device Memories and Data Transfer \\ 3.5. Kernel Functions and Threading \\ 3.6. Summary Function Declarations Kernel Launch Predefined Variables Runtime API \\ 4: CUDA Threads \\ 4.1. CUDA Thread Organization \\ 4.2. More on BlockIdx and ThreadIdx \\ 4.3. Synchronization and Transparent Scalability \\ 4.4. Thread Assignment \\ 4.5. Thread Scheduling and Latency Tolerance \\ 4.6. Summary \\ 5: CUDA Memories \\ 5.1. Importance of Memory Access Efficiency \\ 5.2. CUDA Device Memory Types \\ 5.3. A Strategy for Reducing Global Memory Traffic \\ 5.4. Memory as a Limiting Factor to Parallelism \\ 5.5. Summary \\ 6: Performance Considerations \\ 6.1. More on Thread Execution \\ 6.2. Global Memory Bandwidth \\ 6.3. Dynamic Partitioning of SM Resources \\ 6.4. Data Prefetching \\ 6.5. Instruction Mix \\ 6.6. Thread Granularity \\ 6.7. Measured Performance and Summary \\ \\ 7: Floating-Point Considerations \\ 7.1. Floating-Point Format Normalized representation of M Excess encoding of E \\ 7.2. Representable Numbers \\ 7.3. Special Bit Patterns and Precision \\ 7.4. Arithmetic Accuracy and Rounding \\ 7.5. Algorithm Considerations \\ 7.6. Summary \\ 8: Application Case Study I \\ Advanced MRI Reconstruction \\ 8.1. Application Background \\ 8.2. Iterative Reconstruction \\ 8.3. Computing FHd \\ Step 1: Determine the Kernel Parallelism Structure \\ Step 2: Getting Around the Memory Bandwidth Limitation \\ Step 3: Use Hardware Trigonometry Functions \\ Step 4: Experimental Performance Testing \\ 8.4. Final Evaluation \\ 9: Application Case Study II \\ Molecular Visualization and Analysis \\ 9.1. Application Background \\ 9.2. A Simple Kernel Implementation \\ 9.3. Instruction Execution Efficiency \\ 9.4. Memory Coalescing \\ 9.5. Additional Performance Comparisons \\ 9.6. Using Multiple GPUs \\ 10: Parallel Programming and Computational Thinking \\ 10.1. Goals of Parallel Programming \\ 10.2. Problem Decomposition \\ 10.3. Algorithm Selection \\ 10.4. Computational Thinking \\ 11: A Brief Introduction to OpenCL? \\ 11.1. Background \\ 11.2. Data Parallelism Model \\ 11.3. Device Architecture \\ 11.4. Kernel Functions \\ 11.5. Device Management and Kernel Launch \\ 11.6. Electrostatic Potential Map in OpenCL \\ 11.7. Summary \\ 12: Conclusion and Future Outlook \\ 12.1. Goals Revisited \\ 12.2. Memory Architecture Evolution \\ 12.3. Kernel Execution Control Evolution \\ 12.4. Core Performance \\ 12.5. Programming Environment \\ 12.6. A Bright Outlook \\ Appendix A: Matrix Multiplication Example Code \\ Appendix B: Speed and feed of current generation CUDA devices", } @Article{Komatitsch:2010:HOF, author = "Dimitri Komatitsch and Gordon Erlebacher and Dominik G{\"o}ddeke and David Mich{\'e}a", title = "High-order finite-element seismic wave propagation modeling with {MPI} on a large {GPU} cluster", journal = j-J-COMPUT-PHYS, volume = "229", number = "20", pages = "7692--7714", day = "1", month = oct, year = "2010", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2010.06.024", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Sat Dec 31 11:58:42 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999110003396", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Koval:2010:USB, author = "Peter Koval and J. D. Talman", title = "Update of spherical {Bessel} transform: {FFTW} and {OpenMP}", journal = j-COMP-PHYS-COMM, volume = "181", number = "12", pages = "2212--2213", month = dec, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2010.08.024", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:31 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465510003188", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Kwon:2010:SPC, author = "Seongnam Kwon and Soonhoi Ha", title = "Serialized parallel code generation framework for {MPSoC}", journal = j-TODAES, volume = "15", number = "2", pages = "11:1--11:??", month = feb, year = "2010", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/1698759.1698761", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Mon Mar 15 11:19:08 MDT 2010", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The models of computations that express concurrency naturally are preferred for initial specification of MPSoC system, since popular programming languages such as C and C++ are designed for sequential execution. In our previous work, we proposed a design framework where two models are used for the initial specification of the system behavior; task model at the top level and dataflow model inside each task. After the partition and mapping process is performed with each architecture candidate, the target code is automatically generated for both Design-Space Exploration (DSE) and final implementation. In this article, we focus on parallel code generation for MPSoC, proposing two main techniques. The first is to express functional and data parallelism differently following the partition and mapping decision. In the proposed technique, the generated code consists of multiple tasks running concurrently, which achieves functional parallelism. On the other hand, we use OpenMP directives to express data parallelism inside a task. Second is to adopt the code serialization technique to execute a multitasking application without OS scheduler, aiming to generate the highly portable code on various platforms for an efficient DSE process. We extend the previous code serialization techniques to multiprocessor systems and utilize the formal properties of the dataflow model for efficient code generation. The experiments including H.263 codec example show the viability of the proposed technique and the efficiency of the generated code.", acknowledgement = ack-nhfb, articleno = "11", fjournal = "ACM Transactions on Design Automation of Electronic Systems (TODAES)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", keywords = "design-space exploration; Embedded software; multiprocessor system on chip; parallel programming; software generation", } @Article{Lastovetsky:2010:RAP, author = "Alexey Lastovetsky and Tahar Kechadi", title = "Recent Advances in {Parallel Virtual Machine} and {Message Passing Interface}", journal = j-IJHPCA, volume = "24", number = "1", pages = "3--4", month = feb, year = "2010", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342009359523", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Aug 31 09:59:45 MDT 2010", bibsource = "http://hpc.sagepub.com/content/24/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/24/1/3.full.pdf+html", acknowledgement = ack-nhfb, journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Li:2010:SVC, author = "Guodong Li and Ganesh Gopalakrishnan and Robert M. Kirby and Dan Quinlan", title = "A symbolic verifier for {CUDA} programs", journal = j-SIGPLAN, volume = "45", number = "5", pages = "357--358", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693512", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a preliminary automated verifier based on mechanical decision procedures which is able to prove functional correctness of CUDA programs and guarantee to detect bugs such as race conditions. We also employ a symbolic partial order reduction (POR) technique to mitigate the interleaving explosion problem.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "cuda; formal verification; SPMD; symbolic analysis", } @Article{Lin:2010:TLS, author = "Paul T. Lin and John N. Shadid", title = "Towards large-scale multi-socket, multicore parallel simulations: Performance of an {MPI}-only semiconductor device simulator", journal = j-J-COMPUT-PHYS, volume = "229", number = "19", pages = "6804--6818", day = "20", month = sep, year = "2010", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2010.05.023", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Sat Dec 31 11:58:37 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999110002846", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Liu:2010:RTC, author = "Fuchang Liu and Takahiro Harada and Youngeun Lee and Young J. Kim", title = "Real-time collision culling of a million bodies on graphics processing units", journal = j-TOG, volume = "29", number = "6", pages = "154:1--154:??", month = dec, year = "2010", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/1882261.1866180", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Thu Dec 9 11:41:01 MST 2010", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "We cull collisions between very large numbers of moving bodies using graphics processing units (GPUs). To perform massively parallel sweep-and-prune (SaP), we mitigate the great density of intervals along the axis of sweep by using principal component analysis to choose the best sweep direction, together with spatial subdivisions to further reduce the number of false positive overlaps. Our algorithm implemented entirely on GPUs using the CUDA framework can handle a million moving objects at interactive rates. As application of our algorithm, we demonstrate the real-time simulation of very large numbers of particles and rigid-body dynamics.", acknowledgement = ack-nhfb, articleno = "154", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", } @Article{Marjanovic:2010:ECC, author = "Vladimir Marjanovic and Jes{\'u}s Labarta and Eduard Ayguad{\'e} and Mateo Valero", title = "Effective communication and computation overlap with hybrid {MPI\slash SMPSs}", journal = j-SIGPLAN, volume = "45", number = "5", pages = "337--338", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1837853.1693502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Communication overhead is one of the dominant factors affecting performance in high-performance computing systems. To reduce the negative impact of communication, programmers overlap communication and computation by using asynchronous communication primitives. This increases code complexity, requiring more development effort and making less readable programs. This paper presents the hybrid use of MPI and SMPSs (SMP superscalar, a task-based shared-memory programming model) that allows the programmer to easily introduce the asynchrony necessary to overlap communication and computation. We demonstrate the hybrid use of MPI/SMPSs with the high-performance LINPACK benchmark (HPL), and compare it to the pure MPI implementation, which uses the look-ahead technique to overlap communication and computation. The hybrid MPI/SMPSs version significantly improves the performance of the pure MPI version, getting close to the asymptotic performance at medium problem sizes and still getting significant benefits at small/large problem sizes.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "hybrid MPI/SMPSs; LINPACK; MPI; parallel programming model", } @Article{Molnar:2010:APM, author = "F. {Moln{\'a}r, Jr.} and T. Szak{\'a}ly and R. M{\'e}sz{\'a}ros and I. Lagzi", title = "Air pollution modelling using a {Graphics Processing Unit} with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "181", number = "1", pages = "105--112", month = jan, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2009.09.008", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:27 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465509002872", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Muller:2010:SMA, author = "Matthias S. M{\"u}ller and Matthijs van Waveren and Ron Lieberman and Brian Whitney and Hideki Saito and Kalyan Kumaran and John Baron and William C. Brantley and Chris Parrott and Tom Elken and Huiyu Feng and Carl Ponder", title = "{SPEC MPI2007} --- an application benchmark suite for parallel systems using {MPI}", journal = j-CCPE, volume = "22", number = "2", pages = "191--205", month = feb, year = "2010", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1535", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:41 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "2 Dec 2009", } @Article{Nesterov:2010:SPT, author = "Oleksandr Nesterov", title = "A simple parallelization technique with {MPI} for ocean circulation models", journal = j-J-PAR-DIST-COMP, volume = "70", number = "1", pages = "35--44", month = jan, year = "2010", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Sep 1 16:27:27 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Nunez:2010:NTS, author = "Alberto N{\'u}{\~n}ez and Javier Fern{\'a}ndez and Jose D. Garcia and F{\'e}lix Garcia and Jes{\'u}s Carretero", title = "New techniques for simulating high performance {MPI} applications on large storage networks", journal = j-J-SUPERCOMPUTING, volume = "51", number = "1", pages = "40--57", month = jan, year = "2010", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Aug 25 08:38:45 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=51&issue=1; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=51&issue=1&spage=40", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Okitsu:2010:HPC, author = "Yusuke Okitsu and Fumihiko Ino and Kenichi Hagihara", title = "High-performance cone beam reconstruction using {CUDA} compatible {GPUs}", journal = j-PARALLEL-COMPUTING, volume = "36", number = "2--3", pages = "129--141", month = feb # "\slash " # mar, year = "2010", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Sep 2 17:51:12 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Olivier:2010:COO, author = "Stephen L. Olivier and Jan F. Prins", title = "Comparison of {OpenMP 3.0} and Other Task Parallel Frameworks on Unbalanced Task Graphs", journal = j-INT-J-PARALLEL-PROG, volume = "38", number = "5--6", pages = "341--360", month = oct, year = "2010", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 1 16:06:49 MDT 2010", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=341", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Pan:2010:CPS, author = "Heidi Pan and Benjamin Hindman and Krste Asanovi{\'c}", title = "Composing parallel software efficiently with {Lithe}", journal = j-SIGPLAN, volume = "45", number = "6", pages = "376--387", month = jun, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1809028.1806639", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Oct 8 17:53:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Applications composed of multiple parallel libraries perform poorly when those libraries interfere with one another by obliviously using the same physical cores, leading to destructive resource oversubscription. This paper presents the design and implementation of {\em Lithe}, a low-level substrate that provides the basic primitives and a standard interface for composing parallel codes efficiently. Lithe can be inserted underneath the runtimes of legacy parallel libraries to provide {\em bolt-on\/} composability without needing to change existing application code. Lithe can also serve as the foundation for building new parallel abstractions and libraries that automatically interoperate with one another.\par In this paper, we show versions of Threading Building Blocks (TBB) and OpenMP perform competitively with their original implementations when ported to Lithe. Furthermore, for two applications composed of multiple parallel libraries, we show that leveraging our substrate outperforms their original, even expertly tuned, implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "composability; cooperative scheduling; hierarchical scheduling; oversubscription; parallelism; resource management; user-level scheduling", } @Article{Pervez:2010:FMA, author = "Salman Pervez and Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev Thakur and William Gropp", title = "Formal methods applied to high-performance computing software design: a case study of {MPI} one-sided communication-based locking", journal = j-SPE, volume = "40", number = "1", pages = "23--43", day = "??", month = jan, year = "2010", CODEN = "SPEXBL", DOI = "https://doi.org/10.1002/spe.946", ISSN = "0038-0644 (print), 1097-024X (electronic)", ISSN-L = "0038-0644", bibdate = "Wed Mar 17 10:16:21 MDT 2010", bibsource = "http://www.interscience.wiley.com/jpages/0038-0644; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Soft{\-}ware\emdash Prac{\-}tice and Experience", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X", onlinedate = "Dec 21 2009 3:42AM", } @Article{Preissl:2010:OCC, author = "Robert Preissl and Alice Koniges and Stephan Ethier and Weixing Wang and Nathan Wichmann", title = "Overlapping communication with computation using {OpenMP} tasks on the {GTS} magnetic fusion code", journal = j-SCI-PROG, volume = "18", number = "3--4", pages = "139--151", month = "????", year = "2010", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-2010-0311", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Dec 13 19:01:33 MST 2011", bibsource = "http://www.iospress.nl/journal/scientific-programming/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Preissl:2010:TMS, author = "Robert Preissl and Martin Schulz and Dieter Kranzlm{\"u}ller and Bronis R. de Supinski and Daniel J. Quinlan", title = "Transforming {MPI} source code based on communication patterns", journal = j-FUT-GEN-COMP-SYS, volume = "26", number = "1", pages = "147--154", month = jan, year = "2010", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Sat Sep 11 13:08:16 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Sainio:2010:CGA, author = "J. Sainio", title = "{CUDAEASY} --- a {GPU} accelerated cosmological lattice program", journal = j-COMP-PHYS-COMM, volume = "181", number = "5", pages = "906--912", month = may, year = "2010", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2010.01.002", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 09:54:29 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465510000159", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Saldana:2010:MPM, author = "Manuel Salda{\~n}a and Arun Patel and Christopher Madill and Daniel Nunes and Danyao Wang and Paul Chow and Ralph Wittig and Henry Styles and Andrew Putnam", title = "{MPI} as a Programming Model for High-Performance Reconfigurable Computers", journal = j-TRETS, volume = "3", number = "4", pages = "22:1--22:??", month = nov, year = "2010", CODEN = "????", DOI = "https://doi.org/10.1145/1862648.1862652", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Tue Nov 23 11:26:33 MST 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Reconfigurable Technology and Systems", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Book{Sanders:2010:CEI, author = "Jason Sanders and Edward Kandrot", title = "{CUDA} by Example: an Introduction to General-purpose {GPU} Programming", publisher = pub-AW, address = pub-AW:adr, pages = "xix + 290", year = "2010", ISBN = "0-13-138768-5", ISBN-13 = "978-0-13-138768-3", LCCN = "QA76.76.A65", bibdate = "Wed Jul 28 23:24:12 MDT 2010", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/master.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; z3950.gbv.de:20011/gvk", abstract = "CUDA is a computing architecture designed to facilitate the development of parallel programs. This book shows programmers how to employ this new technology. Each area of CUDA development is introduced through working examples. After a concise introduction to the CUDA platform and architecture, as well as a quick-start guide to CUDA C, the book details the techniques and trade-offs associated with each key CUDA feature.", acknowledgement = ack-nhfb, keywords = "CUDA; GPU", subject = "application software; development; computer architecture; parallel programming (computer science)", tableofcontents = "Why CUDA? why now? \\ Getting started \\ Introduction to CUDA C \\ Parallel programming in CUDA C \\ Thread cooperation \\ Constant memory and events \\ Texture memory \\ Graphics interoperability \\ Atomics \\ Streams \\ CUDA C on multiple GPUs \\ The final countdown \\ Appendix A: Advanced atomics", } @Article{Sandes:2010:CUG, author = "Edans Flavius O. Sandes and Alba Cristina M. A. de Melo", title = "{CUDAlign}: using {GPU} to accelerate the comparison of megabase genomic sequences", journal = j-SIGPLAN, volume = "45", number = "5", pages = "137--146", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693473", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Biological sequence comparison is a very important operation in Bioinformatics. Even though there do exist exact methods to compare biological sequences, these methods are often neglected due to their quadratic time and space complexity. In order to accelerate these methods, many GPU algorithms were proposed in the literature. Nevertheless, all of them restrict the size of the smallest sequence in such a way that Megabase genome comparison is prevented. In this paper, we propose and evaluate CUDAlign, a GPU algorithm that is able to compare Megabase biological sequences with an exact Smith--Waterman affine gap variant. CUDAlign was implemented in CUDA and tested in two GPU boards, separately. For real sequences whose size range from 1MBP (Megabase Pairs) to 47MBP, a close to uniform GCUPS (Giga Cells Updates per Second) was obtained, showing the potential scalability of our approach. Also, CUDAlign was able to compare the human chromosome 21 and the chimpanzee chromosome 22. This operation took 21 hours on GeForce GTX 280, resulting in a peak performance of 20.375 GCUPS. As far as we know, this is the first time such huge chromosomes are compared with an exact method.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "biological sequence comparison; GPU; Smith--Waterman", } @Article{Segovia:2010:PPN, author = "Alejandro Segovia", title = "Parallel programming with {NVIDIA CUDA}", journal = j-LINUX-J, volume = "2010", number = "200", pages = "2:1--2:??", month = dec, year = "2010", CODEN = "LIJOFX", ISSN = "1075-3583 (print), 1938-3827 (electronic)", ISSN-L = "1075-3583", bibdate = "Mon Jan 10 10:01:27 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/linux-journal.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, articleno = "2", fjournal = "Linux Journal", journal-URL = "http://portal.acm.org/citation.cfm?id=J508", } @Article{Shi:2010:PAE, author = "Haixiang Shi and Bertil Schmidt and Weiguo Liu and Wolfgang M{\"u}ller-Wittig", title = "A Parallel Algorithm for Error Correction in High-Throughput Short-Read Data on {CUDA}-Enabled Graphics Hardware", journal = j-J-COMPUT-BIOL, volume = "17", number = "4", pages = "603--615", month = apr, year = "2010", CODEN = "JCOBEM", DOI = "https://doi.org/10.1089/cmb.2009.0062", ISSN = "1066-5277 (print), 1557-8666 (electronic)", ISSN-L = "1066-5277", bibdate = "Sat Jun 1 09:49:51 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputbiol.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.liebertpub.com/doi/abs/10.1089/cmb.2009.0062; https://www.liebertpub.com/doi/pdf/10.1089/cmb.2009.0062", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Biology", journal-URL = "https://www.liebertpub.com/loi/cmb/", onlinedate = "28 April 2010", } @Article{Stone:2010:OPP, author = "John E. Stone and David Gohara and Guochun Shi", title = "{OpenCL}: a Parallel Programming Standard for Heterogeneous Computing Systems", journal = j-COMPUT-SCI-ENG, volume = "12", number = "3", pages = "66--73", month = may # "\slash " # jun, year = "2010", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2010.69", ISSN = "0740-7475 (print), 1558-1918 (electronic)", ISSN-L = "1521-9615", bibdate = "Thu May 13 11:08:14 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @InProceedings{Suciu:2010:PIN, author = "A. Suciu and I. Nagy and K. Marton and I. Pinca", editor = "Ioan Alfred Letia", booktitle = "{Proceedings, 2010 IEEE 6th International Conference on Intelligent Computer Communication and Processing: Cluj-Napoca, Romania, August 26--28, 2010}", title = "Parallel implementation of the {NIST Statistical Test Suite}", publisher = pub-IEEE, address = pub-IEEE:adr, bookpages = "xiii + 487", pages = "363--368", year = "2010", DOI = "https://doi.org/10.1109/ICCP.2010.5606412", ISBN = "1-4244-8228-3 (print), 1-4244-8230-5 (electronic)", ISBN-13 = "978-1-4244-8228-3 (print), 978-1-4244-8230-6 (electronic)", LCCN = "QA76.76.E95", bibdate = "Tue Jan 31 14:22:16 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number CFP1009D-ART.", URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=5598248", acknowledgement = ack-nhfb, keywords = "OpenMP API", onlinedate = "21 October 2010", remark = "From the abstract: ``Experimental results show a very significant speedup of up to 103 times compared to the original version.''", } @Article{Traff:2010:SCM, author = "Jesper Larsson Traff and William D. Gropp and Rajeev Thakur", title = "Self-Consistent {MPI} Performance Guidelines", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "21", number = "5", pages = "698--709", month = may, year = "2010", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2009.120", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu May 13 12:06:56 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Tzannes:2010:LBS, author = "Alexandros Tzannes and George C. Caragea and Rajeev Barua and Uzi Vishkin", title = "Lazy binary-splitting: a run-time adaptive work-stealing scheduler", journal = j-SIGPLAN, volume = "45", number = "5", pages = "179--190", month = may, year = "2010", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1693453.1693479", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Aug 31 22:39:18 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We present Lazy Binary Splitting (LBS), a user-level scheduler of nested parallelism for shared-memory multiprocessors that builds on existing Eager Binary Splitting work-stealing (EBS) implemented in Intel's Threading Building Blocks (TBB), but improves performance and ease-of-programming. In its simplest form (SP), EBS requires manual tuning by repeatedly running the application under carefully controlled conditions to determine a {\em stop-splitting-threshold (sst)\/} for every do-all loop in the code. This threshold limits the parallelism and prevents excessive overheads for fine-grain parallelism. Besides being tedious, this tuning also over-fits the code to some particular dataset, platform and calling context of the do-all loop, resulting in poor performance portability for the code. LBS overcomes both the performance portability and ease-of-programming pitfalls of a manually fixed threshold by adapting dynamically to run-time conditions without requiring tuning.\par We compare LBS to Auto-Partitioner (AP), the latest default scheduler of TBB, which does not require manual tuning either but lacks context portability, and outperform it by 38.9\% using TBB's default AP configuration, and by 16.2\% after we tuned AP to our experimental platform. We also compare LBS to SP by manually finding SP's sst using a training dataset and then running both on a different execution dataset. LBS outperforms SP by 19.5\% on average. while allowing for improved performance portability without requiring tedious manual tuning. LBS also outperforms SP with {\em sst=1}, its default value when undefined, by 56.7\%, and serializing work-stealing (SWS), another work-stealer by 54.7\%. Finally, compared to serializing inner parallelism (SI) which has been used by OpenMP, LBS is 54.2\% faster.", acknowledgement = ack-nhfb, fjournal = "ACM SIG{\-}PLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", keywords = "dynamic scheduling; load balancing; nested parallelism; thread scheduling; work stealing", } @Article{Wendykier:2010:PCH, author = "Piotr Wendykier and James G. Nagy", title = "{Parallel Colt}: a High-Performance {Java} Library for Scientific Computing and Image Processing", journal = j-TOMS, volume = "37", number = "3", pages = "31:1--31:22", month = sep, year = "2010", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/1824801.1824809", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon Sep 27 10:15:50 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "Major breakthroughs in chip and software design have been observed for the last nine years. In October 2001, IBM released the world's first multicore processor: POWER4. Six years later, in February 2007, NVIDIA made a public release of CUDA SDK, a set of development tools to write algorithms for execution on Graphic Processing Units (GPUs). Although software vendors have started working on parallelizing their products, the vast majority of existing code is still sequential and does not effectively utilize modern multicore CPUs and manycore GPUs.\par This article describes Parallel Colt, a multithreaded Java library for scientific computing and image processing. In addition to describing the design and functionality of Parallel Colt, a comparison to MATLAB is presented. Two ImageJ plugins for iterative image deblurring and motion correction of PET brain images are described as typical applications of this library. Performance comparisons with MATLAB, including GPU computations via AccelerEyes' Jacket toolbox are also given.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", keywords = "Deconvolution; FFT; inverse problems; iterative methods; motion correction; multithreading; PET; regularization", } @InProceedings{Zhao:2010:GMP, author = "Kaiyong Zhao and Xiaowen Chu", editor = "{IEEE}", booktitle = "{IEEE 10th International Conference on Computer and Information Technology (CIT), 2010: June 29, 2010--July 1, 2010, Bradford, West Yorkshire, UK}", title = "{GPUMP}: a Multiple-Precision Integer Library for {GPUs}", publisher = pub-IEEE, address = pub-IEEE:adr, bookpages = "xcix + 2987 (est.)", pages = "1164--1168", year = "2010", DOI = "https://doi.org/10.1109/CIT.2010.211", ISBN = "0-7695-4108-9 (print), 1-4244-7547-3", ISBN-13 = "978-0-7695-4108-2 (print), 978-1-4244-7547-6", LCCN = "????", bibdate = "Thu Jan 16 10:33:01 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE Computer Society Order Number E4108. BMS Part Number: CFP10355-CDR", acknowledgement = ack-nhfb, book-URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=5575291", keywords = "CUDA; GPU; multiple-precision algorithm; multiple-precision comparison; multiple-precision division; multiple-precision exponentiation; multiple-precision modular addition; multiple-precision modular multiplication; multiple-precision Montgomery exponentiation; multiple-precision Montgomery multiplication; multiple-precision Montgomery reduction; multiple-precision multiplication; nVidia GT200 GPU", } @Article{Agrawal:2011:PPS, author = "Ankit Agrawal and Sanchit Misra and Daniel Honbo and Alok Choudhary", title = "Parallel pairwise statistical significance estimation of local sequence alignment using {Message Passing Interface} library", journal = j-CCPE, volume = "23", number = "17", pages = "2269--2279", day = "10", month = dec, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1798", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:09:00 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "7 Jul 2011", } @Article{Agullo:2011:QOM, author = "Emmanuel Agullo and Camille Coti and Thomas Herault and Julien Langou and Sylvain Peyronnet and Ala Rezmerita and Franck Cappello and Jack Dongarra", title = "{QCG-OMPI}: {MPI} applications on grids", journal = j-FUT-GEN-COMP-SYS, volume = "27", number = "4", pages = "357--369", month = apr, year = "2011", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Tue Aug 30 11:43:29 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Alonso:2011:NEM, author = "P. Alonso and R. Cortina and F. J. Mart{\'\i}nez-Zald{\'\i}var and J. Ranilla", title = "{Neville} elimination on multi- and many-core systems: {OpenMP}, {MPI} and {CUDA}", journal = j-J-SUPERCOMPUTING, volume = "58", number = "2", pages = "215--225", month = nov, year = "2011", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 13 15:25:06 MST 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=58&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=58&issue=2&spage=215", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Balaji:2011:MMC, author = "Pavan Balaji and Darius Buntinas and David Goodell and William Gropp and Torsten Hoefler and Sameer Kumar and Ewing Lusk and Rajeev Thakur and Jesper Larsson Tr{\"a}ff", title = "{MPI} on Millions of Cores", journal = j-PARALLEL-PROCESS-LETT, volume = "21", number = "1", pages = "45--60", month = mar, year = "2011", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626411000060", ISSN = "0129-6264 (print), 1793-642X (electronic)", bibdate = "Tue Feb 28 11:32:06 MST 2012", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Balevic:2011:KAD, author = "Ana Balevic and Bart Kienhuis", title = "{KPN2GPU}: an approach for discovery and exploitation of fine-grain data parallelism in process networks", journal = j-COMP-ARCH-NEWS, volume = "39", number = "4", pages = "66--71", month = sep, year = "2011", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2082156.2082173", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Tue Dec 20 17:53:58 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "With advances in manycore and accelerator architectures, the high performance and embedded spaces are rapidly converging. Emerging architectures feature different forms of parallelism. The Polyhedral Processes Networks (PPNs) are a proven model of choice for automated generation of pipeline and task parallel programs from sequential source code, however data parallelism is not addressed. In this paper, we present a systematic approach for identification and extraction of fine grain data parallelism from the PPN specification. The approach is implemented in a tool, called kpn2gpu, which produces fine-grain data parallel CUDA kernels for graphics processing units (GPUs).", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Bhattacharjee:2011:PLC, author = "Abhishek Bhattacharjee and Gilberto Contreras and Margaret Martonosi", title = "Parallelization libraries: Characterizing and reducing overheads", journal = j-TACO, volume = "8", number = "1", pages = "5:1--5:??", month = apr, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1952998.1953003", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Wed Apr 27 07:54:03 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Creating efficient, scalable dynamic parallel runtime systems for chip multiprocessors (CMPs) requires understanding the overheads that manifest at high core counts and small task sizes. In this article, we assess these overheads on Intel's Threading Building Blocks (TBB) and OpenMP. First, we use real hardware and simulations to detail various scheduler and synchronization overheads. We find that these can amount to 47\% of TBB benchmark runtime and 80\% of OpenMP benchmark runtime. Second, we propose load balancing techniques such as occupancy-based and criticality-guided task stealing, to boost performance. Overall, our study provides valuable insights for creating robust, scalable runtime libraries.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Camp:2011:SIU, author = "David Camp and Christoph Garth and Hank Childs and Dave Pugmire and Kenneth I. Joy", title = "Streamline Integration Using {MPI}-Hybrid Parallelism on a Large Multicore Architecture", journal = j-IEEE-TRANS-VIS-COMPUT-GRAPH, volume = "17", number = "11", pages = "1702--1713", month = nov, year = "2011", CODEN = "ITVGEA", DOI = "https://doi.org/10.1109/TVCG.2010.259", ISSN = "1077-2626 (print), 1941-0506 (electronic), 2160-9306", ISSN-L = "1077-2626", bibdate = "Thu Sep 29 11:52:46 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetransviscomputgraph.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Visualization and Computer Graphics", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2945", } @Article{Cao:2011:OMM, author = "Chao Cao and Yun-wen Chen and Yuning Wu and Erik Deumens and Hai-Ping Cheng", title = "{OPAL}: a multiscale multicenter simulation package based on {MPI-2} protocol", journal = j-IJQC, volume = "111", number = "15", pages = "4020--4029", month = dec, year = "2011", CODEN = "IJQCB2", DOI = "https://doi.org/10.1002/qua.22916", ISSN = "0020-7608 (print), 1097-461X (electronic)", ISSN-L = "0020-7608", bibdate = "Sat Oct 1 15:40:12 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijqc2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Int. J. Quantum Chem.", fjournal = "International Journal of Quantum Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/", onlinedate = "23 Nov 2010", } @Article{Catanzaro:2011:CCE, author = "Bryan Catanzaro and Michael Garland and Kurt Keutzer", title = "{Copperhead}: compiling an embedded data parallel language", journal = j-SIGPLAN, volume = "46", number = "8", pages = "47--56", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941562", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Modern parallel microprocessors deliver high performance on applications that expose substantial fine-grained data parallelism. Although data parallelism is widely available in many computations, implementing data parallel algorithms in low-level languages is often an unnecessarily difficult task. The characteristics of parallel microprocessors and the limitations of current programming methodologies motivate our design of Copperhead, a high-level data parallel language embedded in Python. The Copperhead programmer describes parallel computations via composition of familiar data parallel primitives supporting both flat and nested data parallel computation on arrays of data. Copperhead programs are expressed in a subset of the widely used Python programming language and interoperate with standard Python modules, including libraries for numeric computation, data visualization, and analysis. In this paper, we discuss the language, compiler, and runtime features that enable Copperhead to efficiently execute data parallel code. We define the restricted subset of Python which Copperhead supports and introduce the program analysis techniques necessary for compiling Copperhead code into efficient low-level implementations. We also outline the runtime support by which Copperhead programs interoperate with standard Python modules. We demonstrate the effectiveness of our techniques with several examples targeting the CUDA platform for parallel programming on GPUs. Copperhead code is concise, on average requiring 3.6 times fewer lines of code than CUDA, and the compiler generates efficient code, yielding 45-100\% of the performance of hand-crafted, well optimized CUDA code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Chalkidis:2011:HPH, author = "Georgios Chalkidis and Masao Nagasaki and Satoru Miyano", title = "High Performance Hybrid Functional {Petri} Net Simulations of Biological Pathway Models on {CUDA}", journal = j-TCBB, volume = "8", number = "6", pages = "1545--1556", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2010.118", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Czapinski:2011:TST, author = "Michal Czapi{\'n}ski and Stuart Barnes", title = "{Tabu Search} with two approaches to parallel flowshop evaluation on {CUDA} platform", journal = j-J-PAR-DIST-COMP, volume = "71", number = "6", pages = "802--811", month = jun, year = "2011", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2011.02.006", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Feb 25 09:11:32 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731511000384", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{delaAsuncion:2011:SOL, author = "Marc de la Asunci{\'o}n and Jos{\'e} M. Mantas and Manuel J. Castro", title = "Simulation of one-layer shallow water systems on multicore and {CUDA} architectures", journal = j-J-SUPERCOMPUTING, volume = "58", number = "2", pages = "206--214", month = nov, year = "2011", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 13 15:25:06 MST 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=58&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=58&issue=2&spage=206", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Dohi:2011:GIO, author = "Keisuke Dohi and Yuichiro Shibata and Kiyoshi Oguri and Takafumi Fujimoto", title = "{GPU} implementation and optimization of electromagnetic simulation using the {FDTD} method for antenna designing", journal = j-COMP-ARCH-NEWS, volume = "39", number = "4", pages = "26--31", month = sep, year = "2011", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2082156.2082163", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Tue Dec 20 17:53:58 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "This paper describes electromagnetical field simulation using the 3D-FDTD method for antenna designing on a CUDA-compatible GPU. We use the Split Perfectly Matched Layer as an absorbing boundary condition. As is well known, the 3D-FDTD method is a kind of stencil computation and is considered better at GPU implementation. In order to find the best blocking size for the target GPU architecture, we empirically explore a design space of blocking size. We also propose a kernel fusing method as one of the efficient optimization methods, which improves the total performance about 10\% at the cost of a small increase in memory usage.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Dotsenko:2011:ATF, author = "Yuri Dotsenko and Sara S. Baghsorkhi and Brandon Lloyd and Naga K. Govindaraju", title = "Auto-tuning of {Fast Fourier Transform} on graphics processors", journal = j-SIGPLAN, volume = "46", number = "8", pages = "257--266", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941589", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "We present an auto-tuning framework for FFTs on graphics processors (GPUs). Due to complex design of the memory and compute subsystems on GPUs, the performance of FFT kernels over the range of possible input parameters can vary widely. We generate several variants for each component of the FFT kernel that, for different cases, are likely to perform well. Our auto-tuner composes variants to generate kernels and selects the best ones. We present heuristics to prune the search space and profile only a small fraction of all possible kernels. We compose optimized kernels to improve the performance of larger FFT computations. We implement the system using the NVIDIA CUDA API and compare its performance to the state-of-the-art FFT libraries. On a range of NVIDIA GPUs and input sizes, our auto-tuned FFTs outperform the NVIDIA CUFFT 3.0 library by up to 38x and deliver up to 3x higher performance compared to a manually-tuned FFT.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Ewedafe:2011:PID, author = "Simon Uzezi Ewedafe and Rio Hirowati Shariffudin", title = "Parallel Implementation of {$2$-D} Telegraphic Equation on {MPI\slash PVM} Cluster", journal = j-INT-J-PARALLEL-PROG, volume = "39", number = "2", pages = "202--231", month = apr, year = "2011", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Tue Sep 6 21:08:27 MDT 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=2&spage=202", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Filgueira:2011:ACE, author = "Rosa Filgueira and David E. Singh and Jes{\'u}s Carretero and Alejandro Calder{\'o}n and F{\'e}lix Garc{\'\i}a", title = "{Adaptive-CoMPI}: Enhancing {MPI}-Based Applications' Performance and Scalability by using Adaptive Compression", journal = j-IJHPCA, volume = "25", number = "1", pages = "93--114", month = feb, year = "2011", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342010373486", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Sep 6 15:14:36 MDT 2011", bibsource = "http://hpc.sagepub.com/content/25/1.toc; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/25/1/93.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "July 26, 2010", } @Article{Fousek:2011:AFC, author = "Jan Fousek and Ji{\v{r}}i Filipovi{\v{c}} and Matu{\v{s}} Madzin", title = "Automatic fusions of {CUDA--GPU} kernels for parallel map", journal = j-COMP-ARCH-NEWS, volume = "39", number = "4", pages = "98--99", month = sep, year = "2011", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2082156.2082183", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Tue Dec 20 17:53:58 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "When implementing a function mapping on the contemporary GPU, several contradictory performance factors affecting distribution of computation into GPU kernels have to be balanced. A decomposition-fusion scheme suggests to decompose the computational problem to be solved by several simple functions implemented as standalone kernels and to fuse some of these functions later into more complex kernels to improve memory locality. In this paper, a prototype of source-to-source compiler automating the fusion phase is presented and the impact of fusions generated by the compiler as well as compiler efficiency is experimentally evaluated.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Garcia:2011:KRR, author = "Saturnino Garcia and Donghwan Jeon and Christopher M. Louie and Michael Bedford Taylor", title = "{Kremlin}: rethinking and rebooting {{\tt gprof}} for the multicore age", journal = j-SIGPLAN, volume = "46", number = "6", pages = "458--469", month = jun, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/1993316.1993553", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 10:23:33 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Many recent parallelization tools lower the barrier for parallelizing a program, but overlook one of the first questions that a programmer needs to answer: which parts of the program should I spend time parallelizing?\par This paper examines Kremlin, an automatic tool that, given a serial version of a program, will make recommendations to the user as to what regions (e.g. loops or functions) of the program to attack first. Kremlin introduces a novel hierarchical critical path analysis and develops a new metric for estimating the potential of parallelizing a region: self-parallelism. We further introduce the concept of a parallelism planner, which provides a ranked order of specific regions to the programmer that are likely to have the largest performance impact when parallelized. Kremlin supports multiple planner personalities, which allow the planner to more effectively target a particular programming environment or class of machine.\par We demonstrate the effectiveness of one such personality, an OpenMP planner, by comparing versions of programs that are parallelized according to Kremlin's plan against third-party manually parallelized versions. The results show that Kremlin's OpenMP planner is highly effective, producing plans whose performance is typically comparable to, and sometimes much better than, manual parallelization. At the same time, these plans would require that the user parallelize significantly fewer regions of the program.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Gopalakrishnan:2011:FAM, author = "Ganesh Gopalakrishnan and Robert M. Kirby and Stephen Siegel and Rajeev Thakur and William Gropp and Ewing Lusk and Bronis R. De Supinski and Martin Schulz and Greg Bronevetsky", title = "Formal analysis of {MPI}-based parallel programs", journal = j-CACM, volume = "54", number = "12", pages = "82--91", month = dec, year = "2011", CODEN = "CACMA2", DOI = "https://doi.org/10.1145/2043174.2043194", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Tue Nov 29 11:53:53 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/cacm/; https://www.math.utah.edu/pub/tex/bib/cacm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", } @Book{Hager:2011:IHP, author = "Georg Hager and Gerhard Wellein", title = "Introduction to high performance computing for scientists and engineers", volume = "7", publisher = pub-CRC, address = pub-CRC:adr, pages = "xxv + 330 + 4", year = "2011", ISBN = "1-4398-1192-X", ISBN-13 = "978-1-4398-1192-4", LCCN = "QA76.88 .H34 2011", bibdate = "Wed Sep 15 13:26:35 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = "Chapman and Hall/CRC computational science series", acknowledgement = ack-nhfb, subject = "high performance computing", tableofcontents = "Modern Processors \\ Stored-program computer architecture \\ General-purpose cache-based microprocessor architecture \\ Memory hierarchies \\ Multicore processors \\ Multithreaded processors \\ Vector processors \\ \\ Basic Optimization Techniques for Serial Code \\ Scalar profiling \\ Common sense optimizations \\ Simple measures, large impact \\ The role of compilers \\ C++ optimizations \\ \\ Data Access Optimization \\ Balance analysis and lightspeed estimates \\ Storage order \\ Case study: The Jacobi algorithm \\ Case study: Dense matrix transpose \\ Algorithm classification and access optimizations \\ Case study: Sparse matrix-vector multiply \\ \\ Parallel Computers \\ Taxonomy of parallel computing paradigms \\ Shared-memory computers \\ Distributed-memory computers \\ Hierarchical (hybrid) systems \\ Networks \\ \\ Basics of Parallelization \\ Why parallelize? \\ Parallelism \\ Parallel scalability \\ \\ Shared-Memory Parallel Programming with OpenMP \\ Short introduction to OpenMP \\ Case study: OpenMP-parallel Jacobi algorithm \\ Advanced OpenMP: Wavefront parallelization \\ \\ Efficient OpenMP Programming \\ Profiling OpenMP programs \\ Performance pitfalls \\ Case study: Parallel sparse matrix-vector multiply \\ \\ Locality Optimizations on ccNUMA Architectures \\ Locality of access on ccNUMA \\ Case study: ccNUMA optimization of sparse MVM \\ Placement pitfalls \\ ccNUMA issues with C++ \\ \\ Distributed-Memory Parallel Programming with MPI \\ Message passing \\ A short introduction to MPI \\ Example: MPI parallelization of a Jacobi solver \\ \\ Efficient MPI Programming \\ MPI performance tools \\ Communication parameters \\ Synchronization, serialization, contention \\ Reducing communication overhead \\ Understanding intranode point-to-point communication \\ Hybrid Parallelization with MPI and OpenMP \\ Basic MPI/OpenMP programming models \\ MPI taxonomy of thread interoperability \\ Hybrid decomposition and mapping \\ Potential benefits and drawbacks of hybrid programming \\ Appendix A: Topology and Affinity in Multicore Environments \\ Appendix B: Solutions to the Problems \\ \\ Bibliography \\ \\ Index", } @Article{Han:2011:HHL, author = "Tianyi David Han and Tarek S. Abdelrahman", title = "{hiCUDA}: High-Level {GPGPU} Programming", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "22", number = "1", pages = "78--90", month = jan, year = "2011", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2010.62", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Feb 25 14:08:57 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Harvey:2011:STP, author = "M. J. Harvey and G. {De Fabritiis}", title = "{Swan}: a tool for porting {CUDA} programs to {OpenCL}", journal = j-COMP-PHYS-COMM, volume = "182", number = "4", pages = "1093--1099", month = apr, year = "2011", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2010.12.052", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:10:57 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465511000117", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Hawick:2011:HSL, author = "K. A. Hawick and D. P. Playne", title = "Hypercubic storage layout and transforms in arbitrary dimensions using {GPUs} and {CUDA}", journal = j-CCPE, volume = "23", number = "10", pages = "1027--1050", month = jul, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1628", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:56 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "28 Aug 2010", } @Article{Hawick:2011:RLS, author = "K. A. Hawick and A. Leist and D. P. Playne", title = "Regular Lattice and Small-World Spin Model Simulations Using {CUDA} and {GPUs}", journal = j-INT-J-PARALLEL-PROG, volume = "39", number = "2", pages = "183--201", month = apr, year = "2011", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Tue Sep 6 21:08:27 MDT 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=2; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=2&spage=183", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Hinde:2011:QMD, author = "Robert J. Hinde", title = "{QSATS}: {MPI}-driven quantum simulations of atomic solids at zero temperature", journal = j-COMP-PHYS-COMM, volume = "182", number = "11", pages = "2339--2349", month = nov, year = "2011", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2011.04.024", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:11:00 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465511001615", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Hoefler:2011:SPT, author = "Torsten Hoefler and Rolf Rabenseifner and Hubert Ritzdorf and Bronis R. de Supinski and Rajeev Thakur and Jesper Larsson Tr{\"a}ff", title = "The scalable process topology interface of {MPI 2.2}", journal = j-CCPE, volume = "23", number = "4", pages = "293--310", day = "25", month = mar, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1643", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:53 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "28 Aug 2010", } @Article{Hong:2011:ACG, author = "Sungpack Hong and Sang Kyun Kim and Tayo Oguntebi and Kunle Olukotun", title = "Accelerating {CUDA} graph algorithms at maximum warp", journal = j-SIGPLAN, volume = "46", number = "8", pages = "267--276", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941590", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "Graphs are powerful data representations favored in many computational domains. Modern GPUs have recently shown promising results in accelerating computationally challenging graph problems but their performance suffered heavily when the graph structure is highly irregular, as most real-world graphs tend to be. In this study, we first observe that the poor performance is caused by work imbalance and is an artifact of a discrepancy between the GPU programming model and the underlying GPU architecture.We then propose a novel virtual warp-centric programming method that exposes the traits of underlying GPU architectures to users. Our method significantly improves the performance of applications with heavily imbalanced workloads, and enables trade-offs between workload imbalance and ALU underutilization for fine-tuning the performance. Our evaluation reveals that our method exhibits up to 9x speedup over previous GPU algorithms and 12x over single thread CPU execution on irregular graphs. When properly configured, it also yields up to 30\% improvement over previous GPU algorithms on regular graphs. In addition to performance gains on graph algorithms, our programming method achieves 1.3x to 15.1x speedup on a set of GPU benchmark applications. Our study also confirms that the performance gap between GPUs and other multi-threaded CPU graph implementations is primarily due to the large difference in memory bandwidth.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @InProceedings{Houzeaux:2011:HMO, author = "G. Houzeaux and M. V{\'a}zquez and X. S{\'a}ez and J. M. Cela", title = "Hybrid {MPI--OpenMP} performance in massively parallel computational fluid dynamics", crossref = "Tromeur-Dervout:2011:PCF", volume = "74", pages = "293--297", year = "2011", DOI = "https://doi.org/10.1007/978-3-642-14438-7_31", bibdate = "Sat Dec 22 08:34:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-14438-7_31", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-14438-7", book-URL = "http://www.springerlink.com/content/978-3-642-14438-7", } @Article{Hussain:2011:PIA, author = "Masroor Hussain and Muhammad Abid and Mushtaq Ahmad and Ashfaq Khokhar and Arif Masud", title = "A Parallel Implementation of {ALE} Moving Mesh Technique for {FSI} Problems using {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "39", number = "6", pages = "717--745", month = dec, year = "2011", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Tue Sep 6 21:08:54 MDT 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=6; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=6&spage=717", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Jin:2011:HPC, author = "Haoqiang Jin and Dennis Jespersen and Piyush Mehrotra and Rupak Biswas and Lei Huang and Barbara Chapman", title = "High performance computing using {MPI} and {OpenMP} on multi-core parallel systems", journal = j-PARALLEL-COMPUTING, volume = "37", number = "9", pages = "562--575", month = sep, year = "2011", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.02.002", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000159", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Kalentev:2011:CCL, author = "Oleksandr Kalentev and Abha Rai and Stefan Kemnitz and Ralf Schneider", title = "Connected component labeling on a {$2$D} grid using {CUDA}", journal = j-J-PAR-DIST-COMP, volume = "71", number = "4", pages = "615--620", month = apr, year = "2011", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Feb 25 19:11:50 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Kim:2011:ASC, author = "Jungwon Kim and Honggyu Kim and Joo Hwan Lee and Jaejin Lee", title = "Achieving a single compute device image in {OpenCL} for multiple {GPUs}", journal = j-SIGPLAN, volume = "46", number = "8", pages = "277--288", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941591", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In this paper, we propose an OpenCL framework that combines multiple GPUs and treats them as a single compute device. Providing a single virtual compute device image to the user makes an OpenCL application written for a single GPU portable to the platform that has multiple GPU devices. It also makes the application exploit full computing power of the multiple GPU devices and the total amount of GPU memories available in the platform. Our OpenCL framework automatically distributes at run-time the OpenCL kernel written for a single GPU into multiple CUDA kernels that execute on the multiple GPU devices. It applies a run-time memory access range analysis to the kernel by performing a sampling run and identifies an optimal workload distribution for the kernel. To achieve a single compute device image, the runtime maintains virtual device memory that is allocated in the main memory. The OpenCL runtime treats the memory as if it were the memory of a single GPU device and keeps it consistent to the memories of the multiple GPU devices. Our OpenCL-C-to-C translator generates the sampling code from the OpenCL kernel code and OpenCL-C-to-CUDA-C translator generates the CUDA kernel code for the distributed OpenCL kernel. We show the effectiveness of our OpenCL framework by implementing the OpenCL runtime and two source-to-source translators. We evaluate its performance with a system that contains 8 GPUs using 11 OpenCL benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Kolonias:2011:DIE, author = "Vasileios Kolonias and Artemios G. Voyiatzis and George Goulas and Efthymios Housos", title = "Design and implementation of an efficient integer count sort in {CUDA GPUs}", journal = j-CCPE, volume = "23", number = "18", pages = "2365--2381", day = "25", month = dec, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1776", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:09:01 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "22 Jun 2011", } @Article{Li:2011:FSM, author = "Guodong Li and Robert Palmer and Michael DeLisi and Ganesh Gopalakrishnan and Robert M. Kirby", title = "Formal specification of {MPI 2.0}: {Case} study in specifying a practical concurrent programming {API}", journal = j-SCI-COMPUT-PROGRAM, volume = "76", number = "2", pages = "65--81", day = "1", month = feb, year = "2011", CODEN = "SCPGD4", ISSN = "0167-6423 (print), 1872-7964 (electronic)", ISSN-L = "0167-6423", bibdate = "Fri Apr 1 18:39:40 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/01676423", acknowledgement = ack-nhfb, fjournal = "Science of Computer Programming", journal-URL = "http://www.sciencedirect.com/science/journal/01676423", } @Article{Liao:2011:DEM, author = "Wei-keng Liao", title = "Design and Evaluation of {MPI} File Domain Partitioning Methods under Extent-Based File Locking Protocol", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "22", number = "2", pages = "260--272", month = feb, year = "2011", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2010.74", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Feb 25 14:08:57 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Lim:2011:ATC, author = "Min Yeol Lim and Vincent W. Freeh and David K. Lowenthal", title = "Adaptive, transparent {CPU} scaling algorithms leveraging inter-node {MPI} communication regions", journal = j-PARALLEL-COMPUTING, volume = "37", number = "10--11", pages = "667--683", month = oct # "\slash " # nov, year = "2011", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.07.001", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:36 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000871", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Liu:2011:CBA, author = "Weiguo Liu and Bertil Schmidt and Wolfgang Muller-Wittig", title = "{CUDA-BLASTP}: Accelerating {BLASTP} on {CUDA}-Enabled Graphics Hardware", journal = j-TCBB, volume = "8", number = "6", pages = "1678--1684", month = nov, year = "2011", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.33", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sun Nov 6 06:45:50 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Michailidis:2011:PDM, author = "Panagiotis D. Michailidis and Konstantinos G. Margaritis", title = "Parallel direct methods for solving the system of linear equations with pipelining on a multicore using {OpenMP}", journal = j-J-COMPUT-APPL-MATH, volume = "236", number = "3", pages = "326--341", day = "1", month = sep, year = "2011", CODEN = "JCAMDI", ISSN = "0377-0427 (print), 1879-1778 (electronic)", ISSN-L = "0377-0427", bibdate = "Sat Feb 25 13:24:37 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputapplmath2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0377042711004183", acknowledgement = ack-nhfb, fjournal = "Journal of Computational and Applied Mathematics", journal-URL = "http://www.sciencedirect.com/science/journal/03770427", } @Article{Mininni:2011:HMO, author = "Pablo D. Mininni and Duane Rosenberg and Raghu Reddy and Annick Pouquet", title = "A hybrid {MPI--OpenMP} scheme for scalable parallel pseudospectral computations for fluid turbulence", journal = j-PARALLEL-COMPUTING, volume = "37", number = "6--7", pages = "316--326", month = jun # "\slash " # jul, year = "2011", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.05.004", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000512", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Mokbel:2011:ASR, author = "Mohammed F. Mokbel and Robert D. Kent and Michael Wong", title = "An Abstract Semantically Rich Compiler Collocative and Interpretative Model for {OpenMP} Programs", journal = j-COMP-J, volume = "54", number = "8", pages = "1325--1343", month = aug, year = "2011", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxr029", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Wed Aug 17 16:34:11 MDT 2011", bibsource = "http://comjnl.oxfordjournals.org/content/54/8.toc; https://www.math.utah.edu/pub/tex/bib/compj2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://comjnl.oxfordjournals.org/content/54/8/1325.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "The Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", onlinedate = "April 5, 2011", } @Article{Pennycook:2011:PAH, author = "S. J. Pennycook and S. D. Hammond and S. A. Jarvis and G. R. Mudalige", title = "Performance analysis of a hybrid {MPI\slash CUDA} implementation of the {NASLU} benchmark", journal = j-SIGMETRICS, volume = "38", number = "4", pages = "23--29", month = mar, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1964218.1964223", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Apr 1 23:02:55 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", remark = "Special issue on the 1st international workshop on performance modeling, benchmarking and simulation of high performance computing systems (PMBS 10).", } @Article{Peters:2011:FPC, author = "Hagen Peters and Ole Schulz-Hildebrandt and Norbert Luttenberger", title = "Fast in-place, comparison-based sorting with {CUDA}: a study with bitonic sort", journal = j-CCPE, volume = "23", number = "7", pages = "681--693", month = may, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1686", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:55 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 Jan 2011", } @Article{Plimpton:2011:MML, author = "Steven J. Plimpton and Karen D. Devine", title = "{MapReduce} in {MPI} for large-scale graph algorithms", journal = j-PARALLEL-COMPUTING, volume = "37", number = "9", pages = "610--632", month = sep, year = "2011", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.02.004", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000172", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Siegel:2011:AFV, author = "Stephen F. Siegel and Timothy K. Zirkel", title = "Automatic formal verification of {MPI}-based parallel programs", journal = j-SIGPLAN, volume = "46", number = "8", pages = "309--310", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941603", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "The Toolkit for Accurate Scientific Software (TASS) is a suite of tools for the formal verification of MPI-based parallel programs used in computational science. TASS can verify various safety properties as well as compare two programs for functional equivalence. The TASS front end takes an integer $ n \geq 1 $ and a C/MPI program, and constructs an abstract model of the program with $n$ processes. Procedures, structs, (multi-dimensional) arrays, heap-allocated data, pointers, and pointer arithmetic are all representable in a TASS model. The model is then explored using symbolic execution and explicit state space enumeration. A number of techniques are used to reduce the time and memory consumed. A variety of realistic MPI programs have been verified with TASS, including Jacobi iteration and manager-worker type programs, and some subtle defects have been discovered. TASS is written in Java and is available from \path=http://vsl.cis.udel.edu/tass= under the Gnu Public License.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Sintorn:2011:EAF, author = "Erik Sintorn and Ola Olsson and Ulf Assarsson", title = "An efficient alias-free shadow algorithm for opaque and transparent objects using per-triangle shadow volumes", journal = j-TOG, volume = "30", number = "6", pages = "153:1--153:??", month = dec, year = "2011", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/2070781.2024187", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Mon Dec 19 15:59:18 MST 2011", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "This paper presents a novel method for generating pixel-accurate shadows from point light-sources in real-time. The new method is able to quickly cull pixels that are not in shadow and to trivially accept large chunks of pixels thanks mainly to using the whole triangle shadow volume as a primitive, instead of rendering the shadow quads independently as in the classic Shadow-Volume algorithm. Our CUDA implementation outperforms z-fail consistently and surpasses z-pass at high resolutions, although these latter two are hardware accelerated, while inheriting none of the robustness issues associated with these methods. Another, perhaps even more important property of our algorithm, is that it requires no pre-processing or identification of silhouette edges and so robustly and efficiently handles arbitrary triangle soups.", acknowledgement = ack-nhfb, articleno = "153", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", } @InProceedings{Smelyanskiy:2011:HPL, author = "Mikhail Smelyanskiy and Karthikeyan Vaidyanathan and Jee Choi and B{\'a}lint Jo{\'o} and Jatin Chhugani and Michael A. Clark and Pradeep Dubey", title = "High-performance lattice {QCD} for multi-core based parallel systems using a cache-friendly hybrid threaded-{MPI} approach", crossref = "Lathrop:2011:SPI", pages = "69:1--69:11", year = "2011", DOI = "https://doi.org/10.1145/2063384.2063477", bibdate = "Fri Dec 16 11:05:47 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib", acknowledgement = ack-nhfb, articleno = "69", } @Article{Stpiczynski:2011:SKB, author = "Przemyslaw Stpiczy{\'n}ski and Joanna Potiopa", title = "Solving a kind of boundary-value problem for ordinary differential equations using {Fermi} --- The next generation {CUDA} computing architecture", journal = j-J-COMPUT-APPL-MATH, volume = "236", number = "3", pages = "384--393", day = "1", month = sep, year = "2011", CODEN = "JCAMDI", ISSN = "0377-0427 (print), 1879-1778 (electronic)", ISSN-L = "0377-0427", bibdate = "Sat Feb 25 13:24:37 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputapplmath2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0377042711004237", acknowledgement = ack-nhfb, fjournal = "Journal of Computational and Applied Mathematics", journal-URL = "http://www.sciencedirect.com/science/journal/03770427", } @Article{Szalay:2011:FCD, author = "Zs{\'o}fia Szalay and J{\'a}nos Rohonczy", title = "Fast calculation of {DNMR} spectra on {CUDA}-enabled graphics card", journal = j-J-COMPUT-CHEM, volume = "32", number = "7", pages = "1262--1270", month = may, year = "2011", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.21706", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Thu Nov 29 14:55:32 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", onlinedate = "29 Nov 2010", } @Article{vanderLaan:2011:AWL, author = "Wladimir J. van der Laan and Andrei C. Jalba and Jos B. T. M. Roerdink", title = "Accelerating Wavelet Lifting on Graphics Hardware Using {CUDA}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "22", number = "1", pages = "132--146", month = jan, year = "2011", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2010.143", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Feb 25 14:08:57 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Wittenbrink:2011:FGG, author = "Craig M. Wittenbrink and Emmett Kilgariff and Arjun Prabhu", title = "{Fermi GF100 GPU} Architecture", journal = j-IEEE-MICRO, volume = "31", number = "2", pages = "50--59", month = mar # "\slash " # apr, year = "2011", CODEN = "IEMIDZ", DOI = "https://doi.org/10.1109/MM.2011.24", ISSN = "0272-1732 (print), 1937-4143 (electronic)", ISSN-L = "0272-1732", bibdate = "Tue Apr 26 13:50:28 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/hot-chips.bib; https://www.math.utah.edu/pub/tex/bib/ieeemicro.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The Fermi GF100 is a GPU architecture that provides several new capabilities beyond the Nvidia GT200 or Tesla architecture. The Fermi architecture offers up to 512 CUDA cores and special features for gaming and high-performance computing. This article describes the GPU's new capabilities for tessellation, physics processing, and computational graphics.", acknowledgement = ack-nhfb, fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", keywords = "Hot Chips 22 conference proceedings", } @Article{Wong:2011:EMS, author = "Hon-Cheng Wong and Un-Hong Wong and Xueshang Feng and Zesheng Tang", title = "Efficient magnetohydrodynamic simulations on graphics processing units with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "182", number = "10", pages = "2132--2160", month = oct, year = "2011", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2011.05.011", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:11:00 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465511001676", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Wu:2011:PCH, author = "Xingfu Wu and Valerie Taylor", title = "Performance characteristics of hybrid {MPI\slash OpenMP} implementations of {NAS} parallel benchmarks {SP} and {BT} on large-scale multicore supercomputers", journal = j-SIGMETRICS, volume = "38", number = "4", pages = "56--62", month = mar, year = "2011", CODEN = "????", DOI = "https://doi.org/10.1145/1964218.1964228", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Apr 1 23:02:55 MDT 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", remark = "Special issue on the 1st international workshop on performance modeling, benchmarking and simulation of high performance computing systems (PMBS 10).", } @Article{Yang:2011:HCO, author = "Chao-Tung Yang and Chih-Lin Huang and Cheng-Fang Lin", title = "Hybrid {CUDA}, {OpenMP}, and {MPI} parallel programming on multicore {GPU} clusters", journal = j-COMP-PHYS-COMM, volume = "182", number = "1", pages = "266--269", month = jan, year = "2011", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2010.06.035", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:10:55 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465510002262", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Yang:2011:PBP, author = "Chao-Tung Yang and Chao-Chin Wu and Jen-Hsiang Chang", title = "Performance-based parallel loop self-scheduling using hybrid {OpenMP} and {MPI} programming on multicore {SMP} clusters", journal = j-CCPE, volume = "23", number = "8", pages = "721--744", day = "10", month = jun, year = "2011", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1627", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Dec 5 10:08:55 MST 2011", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "26 Sep 2010", } @Article{Yilmaz:2011:RMS, author = "Erdal Yilmaz and Eray Molla and Cansin Yildiz and Veysi Isler", title = "Realistic modeling of spectator behavior for soccer videogames with {CUDA}", journal = j-COMPUTERS-AND-GRAPHICS, volume = "35", number = "6", pages = "1063--1069", month = dec, year = "2011", CODEN = "COGRD2", DOI = "https://doi.org/10.1016/j.cag.2011.10.001", ISSN = "0097-8493 (print), 1873-7684 (electronic)", ISSN-L = "0097-8493", bibdate = "Mon Feb 13 16:42:03 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compgraph.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/00978493", URL = "http://www.sciencedirect.com/science/article/pii/S0097849311001476", acknowledgement = ack-nhfb, fjournal = "Computers \& Graphics", journal-URL = "http://www.sciencedirect.com/science/journal/00978493", } @InProceedings{Zhai:2011:CVH, author = "Yan Zhai and Mingliang Liu and Jidong Zhai and Xiaosong Ma and Wenguang Chen", title = "Cloud versus in-house cluster: evaluating {Amazon} cluster compute instances for running {MPI} applications", crossref = "ACM:2011:SSP", pages = "11:1--11:10", year = "2011", DOI = "https://doi.org/10.1145/2063348.2063363", bibdate = "Fri Dec 16 11:19:26 MST 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib", acknowledgement = ack-nhfb, articleno = "11", keywords = "Amazon EC2 CCI; IB cluster (InfiniBand)", } @Article{Zheng:2011:GLO, author = "Mai Zheng and Vignesh T. Ravi and Feng Qin and Gagan Agrawal", title = "{GRace}: a low-overhead mechanism for detecting data races in {GPU} programs", journal = j-SIGPLAN, volume = "46", number = "8", pages = "135--146", month = aug, year = "2011", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2038037.1941574", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 14:04:45 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '11 Conference proceedings.", abstract = "In recent years, GPUs have emerged as an extremely cost-effective means for achieving high performance. Many application developers, including those with no prior parallel programming experience, are now trying to scale their applications using GPUs. While languages like CUDA and OpenCL have eased GPU programming for non-graphical applications, they are still explicitly parallel languages. All parallel programmers, particularly the novices, need tools that can help ensuring the correctness of their programs. Like any multithreaded environment, data races on GPUs can severely affect the program reliability. Thus, tool support for detecting race conditions can significantly benefit GPU application developers. Existing approaches for detecting data races on CPUs or GPUs have one or more of the following limitations: (1) being ill-suited for handling non-lock synchronization primitives on GPUs; (2) lacking of scalability due to the state explosion problem; (3) reporting many false positives because of simplified modeling; and/or (4) incurring prohibitive runtime and space overhead. In this paper, we propose GRace, a new mechanism for detecting races in GPU programs that combines static analysis with a carefully designed dynamic checker for logging and analyzing information at runtime. Our design utilizes GPUs memory hierarchy to log runtime data accesses efficiently. To improve the performance, GRace leverages static analysis to reduce the number of statements that need to be instrumented. Additionally, by exploiting the knowledge of thread scheduling and the execution model in the underlying GPUs, GRace can accurately detect data races with no false positives reported. Based on the above idea, we have built a prototype of GRace with two schemes, i.e., GRace-stmt and GRace-addr, for NVIDIA GPUs. Both schemes are integrated with the same static analysis. We have evaluated GRace-stmt and GRace-addr with three data race bugs in three GPU kernel functions and also have compared them with the existing approach, referred to as B-tool. Our experimental results show that both schemes of GRace are effective in detecting all evaluated cases with no false positives, whereas Btool reports many false positives for one evaluated case. On the one hand, GRace-addr incurs low runtime overhead, i.e., 22-116\%, and low space overhead, i.e., 9-18MB, for the evaluated kernels. On the other hand, GRace-stmt offers more help in diagnosing data races with larger overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Agathos:2012:TBE, author = "Spiros N. Agathos and Panagiotis E. Hadjidoukas and Vassilios V. Dimakopoulos", title = "Task-Based Execution of Nested {OpenMP} Loops", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "210--222", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_16", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_16/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Amritkar:2012:OPF, author = "Amit Amritkar and Danesh Tafti and Rui Liu and Rick Kufrin and Barbara Chapman", title = "{OpenMP} parallelism for fluid and fluid-particulate systems", journal = j-PARALLEL-COMPUTING, volume = "38", number = "9", pages = "501--517", month = sep, year = "2012", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2012.05.005", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jul 30 14:28:54 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819112000476", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Andersch:2012:PPE, author = "Michael Andersch and Chi Ching Chi and Ben Juurlink", title = "Programming parallel embedded and consumer applications in {OpenMP} superscalar", journal = j-SIGPLAN, volume = "47", number = "8", pages = "281--282", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145854", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we evaluate the performance and usability of the parallel programming model OpenMP Superscalar (OmpSs), apply it to 10 different benchmarks and compare its performance with corresponding POSIX threads implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Misc{Anonymous:2012:CTC, author = "Anonymous", title = "{CUDA Toolkit 5.0 CURAND} Guide", howpublished = "Web document", year = "2012", bibdate = "Sat Feb 08 18:16:05 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://docs.nvidia.com/cuda/pdf/CURAND_Library.pdf", acknowledgement = ack-nhfb, keywords = "random-number generator", } @Article{Baskaran:2012:ACO, author = "Muthu Manikandan Baskaran and Nicolas Vasilache and Benoit Meister and Richard Lethin", title = "Automatic communication optimizations through memory reuse strategies", journal = j-SIGPLAN, volume = "47", number = "8", pages = "277--278", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145852", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Modern parallel architectures are emerging with sophisticated hardware consisting of hierarchically placed parallel processors and memories. The properties of memories in a system vary wildly, not only quantitatively (size, latency, bandwidth, number of banks) but also qualitatively (scratchpad, cache). Along with the emergence of such architectures comes the need for effectively utilizing the parallel processors and properly managing data movement across memories to improve memory bandwidth and hide data transfer latency. In this paper, we describe some of the high-level optimizations that are targeted at the improvement of memory performance in the R-Stream compiler, a high-level source-to-source automatic parallelizing compiler. We direct our focus in this paper on optimizing communications (data transfers) by improving memory reuse at various levels of an explicit memory hierarchy. This general concept is well-suited to the hardware properties of GPGPUs, which is the architecture that we concentrate on for this paper. We apply our techniques and obtain performance improvement on various stencil kernels including an important iterative stencil kernel in seismic processing applications where the performance is comparable to that of the state-of-the-art implementation of the kernel by a CUDA expert.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Bawidamann:2012:ETO, author = "Uwe Bawidamann and Marco Nehmeier", title = "Expression Templates and {OpenCL}", journal = j-LECT-NOTES-COMP-SCI, volume = "7204", pages = "71--80", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-31500-8_8", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:26:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_8/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-31500-8", book-URL = "http://www.springerlink.com/content/978-3-642-31500-8", fjournal = "Lecture Notes in Computer Science", } @Article{Berg:2012:FCL, author = "Bernd A. Berg and Hao Wu", title = "{Fortran} code for {$ {\rm SU}(3) $} lattice gauge theory with and without {MPI} checkerboard parallelization", journal = j-COMP-PHYS-COMM, volume = "183", number = "10", pages = "2145--2157", month = oct, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2012.03.021", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu Jun 28 15:53:26 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001269", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bergstrom:2012:NDP, author = "Lars Bergstrom and John Reppy", title = "Nested data-parallelism on the {GPU}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "247--258", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364563", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics processing units (GPUs) provide both memory bandwidth and arithmetic performance far greater than that available on CPUs but, because of their Single-Instruction-Multiple-Data (SIMD) architecture, they are hard to program. Most of the programs ported to GPUs thus far use traditional data-level parallelism, performing only operations that operate uniformly over vectors. NESL is a first-order functional language that was designed to allow programmers to write irregular-parallel programs --- such as parallel divide-and-conquer algorithms --- for wide-vector parallel computers. This paper presents our port of the NESL implementation to work on GPUs and provides empirical evidence that nested data-parallelism (NDP) on GPUs significantly outperforms CPU-based implementations and matches or beats newer GPU languages that support only flat parallelism. While our performance does not match that of hand-tuned CUDA programs, we argue that the notational conciseness of NESL is worth the loss in performance. This work provides the first language implementation that directly supports NDP on a GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ICFP '12 conference proceedings.", } @Article{Berka:2012:PET, author = "Tobias Berka and Helge Hagenauer and Marian Vajter{\v{s}}ic", title = "Portable Explicit Threading and Concurrent Programming for {MPI} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "7204", pages = "81--90", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-31500-8_9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:26:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_9/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-31500-8", book-URL = "http://www.springerlink.com/content/978-3-642-31500-8", fjournal = "Lecture Notes in Computer Science", } @Article{Betts:2012:GVG, author = "Adam Betts and Nathan Chong and Alastair Donaldson and Shaz Qadeer and Paul Thomson", title = "{GPUVerify}: a verifier for {GPU} kernels", journal = j-SIGPLAN, volume = "47", number = "10", pages = "113--132", month = oct, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398857.2384625", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:23 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present a technique for verifying race- and divergence-freedom of GPU kernels that are written in mainstream kernel programming languages such as OpenCL and CUDA. Our approach is founded on a novel formal operational semantics for GPU programming termed synchronous, delayed visibility (SDV) semantics. The SDV semantics provides a precise definition of barrier divergence in GPU kernels and allows kernel verification to be reduced to analysis of a sequential program, thereby completely avoiding the need to reason about thread interleavings, and allowing existing modular techniques for program verification to be leveraged. We describe an efficient encoding for data race detection and propose a method for automatically inferring loop invariants required for verification. We have implemented these techniques as a practical verification tool, GPUVerify, which can be applied directly to OpenCL and CUDA source code. We evaluate GPUVerify with respect to a set of 163 kernels drawn from public and commercial sources. Our evaluation demonstrates that GPUVerify is capable of efficient, automatic verification of a large number of real-world kernels.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "OOPSLA '12 conference proceedings.", } @Article{Bihari:2012:CIT, author = "Barna L. Bihari and Michael Wong and Amy Wang and Bronis R. de Supinski and Wang Chen", title = "A Case for Including Transactions in {OpenMP} {II}: Hardware Transactional Memory", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "44--58", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_4", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_4/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Blattner:2012:PSC, author = "Timothy Blattner and Shiming Yang", title = "Performance study on {CUDA GPUs} for parallelizing the local ensemble transformed {Kalman} filter algorithm", journal = j-CCPE, volume = "24", number = "2", pages = "167--177", month = feb, year = "2012", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1859", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Apr 4 09:18:00 MDT 2012", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "17 Oct 2011", } @Article{Broquedis:2012:LEO, author = "Fran{\c{c}}ois Broquedis and Thierry Gautier and Vincent Danjean", title = "{libOMP}, an Efficient {OpenMP} Runtime System for Both Fork-Join and Data Flow Paradigms", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "102--115", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_8", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_8/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Bruning:2012:MFT, author = "Ulrich Br{\"u}ning", title = "{MPI} Functions and Their Impact on Interconnect Hardware", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "10--10", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_2", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_2", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Bureddy:2012:OGM, author = "D. Bureddy and H. Wang and A. Venkatesh and S. Potluri and D. K. Panda", title = "{OMB-GPU}: a Micro-Benchmark Suite for Evaluating {MPI} Libraries on {GPU} Clusters", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "110--120", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_16", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_16/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Busa:2012:ACO, author = "J{\'a}n {Busa, Jr.} and Shura Hayryan and Ming-Chya Wu and J{\'a}n Busa and Chin-Kun Hu", title = "{ARVO-CL}: the {OpenCL} version of the {ARVO} package --- An efficient tool for computing the accessible surface area and the excluded volume of proteins via analytical equations", journal = j-COMP-PHYS-COMM, volume = "183", number = "11", pages = "2494--2497", month = nov, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2012.04.019", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jul 27 07:00:54 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001580", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bustamam:2012:FPM, author = "Alhadi Bustamam and Kevin Burrage and Nicholas A. Hamilton", title = "Fast Parallel {Markov} Clustering in Bioinformatics Using Massively Parallel Computing on {GPU} with {CUDA} and {ELLPACK-R} Sparse Format", journal = j-TCBB, volume = "9", number = "3", pages = "679--692", month = may, year = "2012", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2011.68", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Apr 19 17:58:10 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Markov clustering (MCL) is becoming a key algorithm within bioinformatics for determining clusters in networks. However, with increasing vast amount of data on biological networks, performance and scalability issues are becoming a critical limiting factor in applications. Meanwhile, GPU computing, which uses CUDA tool for implementing a massively parallel computing environment in the GPU card, is becoming a very powerful, efficient, and low-cost option to achieve substantial performance gains over CPU approaches. The use of on-chip memory on the GPU is efficiently lowering the latency time, thus, circumventing a major issue in other parallel computing environments, such as MPI. We introduce a very fast Markov clustering algorithm using CUDA (CUDA-MCL) to perform parallel sparse matrix-matrix computations and parallel sparse Markov matrix normalizations, which are at the heart of MCL. We utilized ELLPACK-R sparse format to allow the effective and fine-grain massively parallel processing to cope with the sparse nature of interaction networks data sets in bioinformatics applications. As the results show, CUDA-MCL is significantly faster than the original MCL running on CPU. Thus, large-scale parallel computation on off-the-shelf desktop-machines, that were previously only possible on supercomputing architectures, can significantly change the way bioinformaticians and biologists deal with their data.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Cabarle:2012:SNP, author = "Francis George C. Cabarle and Henry Adorna and Miguel A. Mart{\'\i}nez", title = "A Spiking Neural {P} System Simulator Based on {CUDA}", journal = j-LECT-NOTES-COMP-SCI, volume = "7184", pages = "87--103", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-28024-5_8", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:25:48 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-28024-5_8/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-28024-5", book-URL = "http://www.springerlink.com/content/978-3-642-28024-5", fjournal = "Lecture Notes in Computer Science", } @Article{Calotoiu:2012:PID, author = "Alexandru Calotoiu and Christian Siebert and Felix Wolf", title = "Pattern-Independent Detection of Manual Collectives in {MPI} Programs", journal = j-LECT-NOTES-COMP-SCI, volume = "7484", pages = "28--39", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-32820-6_5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:34 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-32820-6_5/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-32820-6", book-URL = "http://www.springerlink.com/content/978-3-642-32820-6", fjournal = "Lecture Notes in Computer Science", } @Article{Cecilia:2012:CSC, author = "Jos{\'e} Mar{\'\i}a Cecilia and Jos{\'e} Manuel Garc{\'\i}a and Manuel Ujald{\'o}n", title = "{CUDA $2$D} Stencil Computations for the {Jacobi} Method", journal = j-LECT-NOTES-COMP-SCI, volume = "7133", pages = "173--183", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-28151-8_17", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:24:46 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-28151-8_17/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-28151-8", book-URL = "http://www.springerlink.com/content/978-3-642-28151-8", fjournal = "Lecture Notes in Computer Science", } @Article{Chen:2012:PUA, author = "Yifeng Chen and Xiang Cui and Hong Mei", title = "{PARRAY}: a unifying array representation for heterogeneous parallelism", journal = j-SIGPLAN, volume = "47", number = "8", pages = "171--180", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145838", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "This paper introduces a programming interface called PARRAY (or Parallelizing ARRAYs) that supports system-level succinct programming for heterogeneous parallel systems like GPU clusters. The current practice of software development requires combining several low-level libraries like Pthread, OpenMP, CUDA and MPI. Achieving productivity and portability is hard with different numbers and models of GPUs. PARRAY extends mainstream C programming with novel array types of distinct features: (1) the dimensions of an array type are nested in a tree, conceptually reflecting the memory hierarchy; (2) the definition of an array type may contain references to other array types, allowing sophisticated array types to be created for parallelization; (3) threads also form arrays that allow programming in a Single-Program-Multiple-Codeblock (SPMC) style to unify various sophisticated communication patterns. This leads to shorter, more portable and maintainable parallel codes, while the programmer still has control over performance-related features necessary for deep manual optimization. Although the source-to-source code generator only faithfully generates low-level library calls according to the type information, higher-level programming and automatic performance optimization are still possible through building libraries of sub-programs on top of PARRAY. The case study on cluster FFT illustrates a simple 30-line code that 2x outperforms Intel Cluster MKL on the Tianhe-1A system with 7168 Fermi GPUs and 14336 CPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Chevitarese:2012:STN, author = "Daniel Salles Chevitarese and Dilza Szwarcman and Marley Vellasco", title = "Speeding Up the Training of Neural Networks with {CUDA} Technology", journal = j-LECT-NOTES-COMP-SCI, volume = "7267", pages = "30--38", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-29347-4_4", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:18:50 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012d.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-29347-4_4/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-29347-4", book-URL = "http://www.springerlink.com/content/978-3-642-29347-4", fjournal = "Lecture Notes in Computer Science", } @Article{Collingbourne:2012:STO, author = "Peter Collingbourne and Cristian Cadar and Paul H. J. Kelly", title = "Symbolic Testing of {OpenCL} Code", journal = j-LECT-NOTES-COMP-SCI, volume = "7261", pages = "203--218", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-34188-5_18", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:18:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012d.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-34188-5_18/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-34188-5", book-URL = "http://www.springerlink.com/content/978-3-642-34188-5", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Cui:2012:OOB, author = "Zheng Cui and Lei Xia and Patrick G. Bridges and Peter A. Dinda and John R. Lange", title = "Optimizing overlay-based virtual networking through optimistic interrupts and cut-through forwarding", crossref = "Hollingsworth:2012:SPI", pages = "99:1--99:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a029.pdf", abstract = "Overlay-based virtual networking provides a powerful model for realizing virtual distributed and parallel computing systems with strong isolation, portability, and recoverability properties. However, in extremely high throughput and low latency networks, such overlays can suffer from bandwidth and latency limitations, which is of particular concern if we want to apply the model in HPC environments. Through careful study of an existing very high performance overlay-based virtual network system, we have identified two core issues limiting performance: delayed and/or excessive virtual interrupt delivery into guests, and copies between host and guest data buffers done during encapsulation. We respond with two novel optimizations: optimistic, timer-free virtual interrupt injection, and zero-copy cut-through data forwarding. These optimizations improve the latency and bandwidth of the overlay network on 10 Gbps interconnects, resulting in near-native performance for a wide range of microbenchmarks and MPI application benchmarks.", acknowledgement = ack-nhfb, articleno = "99", } @Article{Danalis:2012:MCT, author = "Anthony Danalis", title = "{MPI} and Compiler Technology: a Love-Hate Relationship", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "12--13", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_4", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_4", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{delaAsuncion:2012:MCI, author = "Marc de la Asunci{\'o}n and Jos{\'e} M. Mantas and Manuel J. Castro and E. D. Fern{\'a}ndez-Nieto", title = "An {MPI-CUDA} implementation of an improved {Roe} method for two-layer shallow water systems", journal = j-J-PAR-DIST-COMP, volume = "72", number = "9", pages = "1065--1072", month = sep, year = "2012", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2011.07.012", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 27 06:43:44 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S074373151100147X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Deshpande:2012:AGC, author = "Vivek Deshpande and Xing Wu and Frank Mueller", title = "Auto-generation of communication benchmark traces", journal = j-SIGMETRICS, volume = "40", number = "2", pages = "99--105", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2381056.2381078", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Nov 9 11:06:40 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmetrics.bib", abstract = "Benchmarks are essential for evaluating HPC hardware and software for petascale machines and beyond. But benchmark creation is a tedious manual process. As a result, benchmarks tend to lag behind the development of complex scientific codes. Our work automates the creation of communication benchmarks. Given an MPI application, we utilize ScalaTrace, a lossless and scalable framework to trace communication operations and execution time while abstracting away the computations. A single trace file that reflects the behavior of all nodes is subsequently expanded to C source code by a novel code generator. This resulting benchmark code is compact, portable, human-readable, and accurately reflects the original application's communication characteristics and performance. Experimental results demonstrate that generated source code of benchmarks preserves both the communication patterns and the run-time behavior of the original application. Such automatically generated benchmarks not only shorten the transition from application development to benchmark extraction but also facilitate code obfuscation, which is essential for benchmark extraction from commercial and restricted applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", } @Article{Deuzeman:2012:LMP, author = "Albert Deuzeman and Siebren Reker and Carsten Urbach and {ETM Collaboration}", title = "{Lemon}: An {MPI} parallel {I/O} library for data encapsulation using {LIME}", journal = j-COMP-PHYS-COMM, volume = "183", number = "6", pages = "1321--1335", month = jun, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2012.01.016", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Feb 29 07:07:40 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465512000318", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Diaz:2012:CCF, author = "M. J. Castro D{\'\i}az and E. Fern{\'a}ndez-Nieto", title = "A Class of Computationally Fast First Order Finite Volume Solvers: {PVM} Methods", journal = j-SIAM-J-SCI-COMP, volume = "34", number = "4", pages = "A2173--A2196", month = "????", year = "2012", CODEN = "SJOCE3", DOI = "https://doi.org/10.1137/100795280", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", bibdate = "Tue Oct 30 14:49:05 MDT 2012", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/34/4; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib", acknowledgement = ack-nhfb, fjournal = "SIAM Journal on Scientific Computing", journal-URL = "http://epubs.siam.org/sisc", onlinedate = "January 2012", } @Article{Didelot:2012:IMC, author = "Sylvain Didelot and Patrick Carribault and Marc P{\'e}rache and William Jalby", title = "Improving {MPI} Communication Overlap with Collaborative Polling", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "37--46", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_9/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Dinan:2012:EMC, author = "James Dinan and David Goodell and William Gropp and Rajeev Thakur and Pavan Balaji", title = "Efficient Multithreaded Context {ID} Allocation in {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "57--66", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_11", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_11/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Du:2012:COT, author = "Peng Du and Rick Weber and Piotr Luszczek and Stanimire Tomov and Gregory Peterson and Jack Dongarra", title = "From {CUDA} to {OpenCL}: Towards a performance-portable solution for multi-platform {GPU} programming", journal = j-PARALLEL-COMPUTING, volume = "38", number = "8", pages = "391--407", month = aug, year = "2012", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.10.002", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Jun 20 17:04:05 MDT 2012", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001335", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dziubak:2012:OOI, author = "Tomasz Dziubak and Jacek Matulewski", title = "An object-oriented implementation of a solver of the time-dependent {Schr{\"o}dinger} equation using the {CUDA} technology", journal = j-COMP-PHYS-COMM, volume = "183", number = "3", pages = "800--812", month = mar, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2011.11.026", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:11:02 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465511003948", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Eichenberger:2012:DOT, author = "Alexandre E. Eichenberger and Christian Terboven and Michael Wong and Dieter an Mey", title = "The Design of {OpenMP} Thread Affinity", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "15--28", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_2", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_2/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{ElZein:2012:GOC, author = "Ahmed H. {El Zein} and Alistair P. Rendell", title = "Generating optimal {CUDA} sparse matrix--vector product implementations for evolving {GPU} hardware", journal = j-CCPE, volume = "24", number = "1", pages = "3--13", month = jan, year = "2012", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1732", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Jan 16 12:11:17 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "17 Apr 2011", } @InProceedings{Fiala:2012:DCS, author = "David Fiala and Frank Mueller and Christian Engelmann and Rolf Riesen and Kurt Ferreira and Ron Brightwell", title = "Detection and correction of silent data corruption for large-scale high-performance computing", crossref = "Hollingsworth:2012:SPI", pages = "78:1--78:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a046.pdf", abstract = "Faults have become the norm rather than the exception for high-end computing clusters. Exacerbating this situation, some of these faults remain undetected, manifesting themselves as silent errors that allow applications to compute incorrect results. This paper studies the potential for redundancy to detect and correct soft errors in MPI message-passing applications while investigating the challenges inherent to detecting soft errors within MPI applications by providing transparent MPI redundancy. By assuming a model wherein corruption in application data manifests itself by producing differing MPI messages between replicas, we study the best suited protocols for detecting and correcting corrupted MPI messages. Using our fault injector, we observe that even a single error can have profound effects on applications by causing a cascading pattern of corruption which in most cases spreads to all other processes. Results indicate that our consistency protocols can successfully protect applications experiencing even high rates of silent data corruption.", acknowledgement = ack-nhfb, articleno = "78", } @Article{Filgueira:2012:DCD, author = "Rosa Filgueira and Jes{\'u}s Carretero and David E. Singh and Alejandro Calder{\'o}n and Alberto N{\'u}{\~n}ez", title = "{Dynamic--CoMPI}: dynamic optimization techniques for {MPI} parallel applications", journal = j-J-SUPERCOMPUTING, volume = "59", number = "1", pages = "361--391", month = jan, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 13 15:25:33 MST 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=361", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Garcia:2012:DLB, author = "Marta Garcia and Julita Corbalan and Rosa Maria Badia and Jesus Labarta", title = "A Dynamic Load Balancing Approach with {SMPSuperscalar} and {MPI}", journal = j-LECT-NOTES-COMP-SCI, volume = "7174", pages = "10--23", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30397-5_2", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:25:38 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30397-5_2/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30397-5", book-URL = "http://www.springerlink.com/content/978-3-642-30397-5", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Garland:2012:DUP, author = "Michael Garland and Manjunath Kudlur and Yili Zheng", title = "Designing a unified programming model for heterogeneous machines", crossref = "Hollingsworth:2012:SPI", pages = "67:1--67:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a064.pdf", abstract = "While high-efficiency machines are increasingly embracing heterogeneous architectures and massive multithreading, contemporary mainstream programming languages reflect a mental model in which processing elements are homogeneous, concurrency is limited, and memory is a flat undifferentiated pool of storage. Moreover, the current state of the art in programming heterogeneous machines tends towards using separate programming models, such as OpenMP and CUDA, for different portions of the machine. Both of these factors make programming emerging heterogeneous machines unnecessarily difficult. We describe the design of the Phalanx programming model, which seeks to provide a unified programming model for heterogeneous machines. It provides constructs for bulk parallelism, synchronization, and data placement which operate across the entire machine. Our prototype implementation is able to launch and coordinate work on both CPU and GPU processors within a single node, and by leveraging the GASNet runtime, is able to run across all the nodes of a distributed-memory machine.", acknowledgement = ack-nhfb, articleno = "67", } @Article{Ghosh:2012:RAA, author = "Sudeep Ghosh and Jason Hiser and Jack W. Davidson", title = "Replacement attacks against {VM}-protected applications", journal = j-SIGPLAN, volume = "47", number = "7", pages = "203--214", month = jul, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2365864.2151051", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Sep 6 10:01:03 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "VEE '12 conference proceedings.", abstract = "Process-level virtualization is increasingly being used to enhance the security of software applications from reverse engineering and unauthorized modification (called software protection). Process-level virtual machines (PVMs) can safeguard the application code at run time and hamper the adversary's ability to launch dynamic attacks on the application. This dynamic protection, combined with its flexibility, ease in handling legacy systems and low performance overhead, has made process-level virtualization a popular approach for providing software protection. While there has been much research on using process-level virtualization to provide such protection, there has been less research on attacks against PVM-protected software. In this paper, we describe an attack on applications protected using process-level virtualization, called a replacement attack. In a replacement attack, the adversary replaces the protecting PVM with an attack VM thereby rendering the application vulnerable to analysis and modification. We present a general description of the replacement attack methodology and two attack implementations against a protected application using freely available tools. The generality and simplicity of replacement attacks demonstrates that there is a strong need to develop techniques that meld applications more tightly to the protecting PVM to prevent such attacks.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Gong:2012:OCN, author = "Yifan Gong and Bingsheng He and Jianlong Zhong", title = "An overview of {CMPI}: network performance aware {MPI} in the cloud", journal = j-SIGPLAN, volume = "47", number = "8", pages = "297--298", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145862", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Cloud computing enables users to perform distributed computing tasks on many virtual machines, without owning a physical cluster. Recently, various distributed computing tasks such as scientific applications are being moved from supercomputers and private clusters to public clouds. Message passing interface (MPI) is a key and common component in distributed computing tasks. The virtualized computing environment of the public cloud hides the network topology information from the users, and existing topology-aware optimizations for MPI are no longer feasible in the cloud environment. We propose a network performance aware MPI library named CMPI. CMPI embraces a new model for capturing the network performance among different virtual machines in the cloud. Based on the network performance model, we develop novel network performance aware algorithms for communication operations. This poster gives an overview of CMPI design, and presents some preliminary results on collective operations such as broadcast.We demonstrate the effectiveness of our network performance aware optimizations on Amazon EC2.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Gravvanis:2012:SFD, author = "G. A. Gravvanis and C. K. Filelis-Papadopoulos and K. M. Giannoutakis", title = "Solving finite difference linear systems on {GPUs}: {CUDA} based Parallel Explicit Preconditioned Biconjugate Conjugate Gradient type Methods", journal = j-J-SUPERCOMPUTING, volume = "61", number = "3", pages = "590--604", month = sep, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Oct 26 07:41:53 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=61&issue=3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=61&issue=3&spage=590", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Gropp:2012:AMI, author = "William Gropp and Ewing Lusk and Rajeev Thakur", title = "Advanced {MPI} Including New {MPI-3} Features", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "14--14", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_5", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Gropp:2012:MBW, author = "William Gropp", title = "{MPI 3} and Beyond: Why {MPI} Is Successful and What Challenges It Faces", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "1--9", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_1", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_1/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Hermanns:2012:SDM, author = "Marc-Andr{\'e} Hermanns and Markus Geimer and Bernd Mohr and Felix Wolf", title = "Scalable detection of {MPI-2} remote memory access inefficiency patterns", journal = j-IJHPCA, volume = "26", number = "3", pages = "227--236", month = aug, year = "2012", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342011406758", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Thu Nov 8 11:31:14 MST 2012", bibsource = "http://hpc.sagepub.com/content/26/3.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/26/3/227.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "June 8, 2011", } @InProceedings{Hilbrich:2012:MRE, author = "Tobias Hilbrich and Joachim Protze and Martin Schulz and Bronis R. de Supinski and Matthias S. M{\"u}ller", title = "{MPI} runtime error detection with {MUST}: advances in deadlock detection", crossref = "Hollingsworth:2012:SPI", pages = "30:1--30:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a010.pdf", abstract = "The widely used Message Passing Interface (MPI) is complex and rich. As a result, application developers require automated tools to avoid and to detect MPI programming errors. We present the Marmot Umpire Scalable Tool (MUST) that detects such errors with significantly increased scalability. We present improvements to our graph-based deadlock detection approach for MPI, which cover future MPI extensions. Our enhancements also check complex MPI constructs that no previous graph-based detection approach handled correctly. Finally, we present optimizations for the processing of MPI operations that reduce runtime deadlock detection overheads. Existing approaches often require O ( p ) analysis time per MPI operation, for p processes. We empirically observe that our improvements lead to sub-linear or better analysis time per operation for a wide range of real world applications.", acknowledgement = ack-nhfb, articleno = "30", } @Article{Hoefler:2012:LMO, author = "Torsten Hoefler and James Dinan and Darius Buntinas and Pavan Balaji and Brian W. Barrett", title = "Leveraging {MPI}'s One-Sided Communication Interface for Shared-Memory Programming", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "132--141", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_18", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_18/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Hoefler:2012:OPC, author = "Torsten Hoefler and Timo Schneider", title = "Optimization principles for collective neighborhood communications", crossref = "Hollingsworth:2012:SPI", pages = "98:1--98:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a028.pdf", abstract = "Many scientific applications operate in a bulk-synchronous mode of iterative communication and computation steps. Even though the communication steps happen at the same logical time, important patterns such as stencil computations cannot be expressed as collective communications in MPI. We demonstrate how neighborhood collective operations allow to specify arbitrary collective communication relations during run-time and enable optimizations similar to traditional collective calls. We show a number of optimization opportunities and algorithms for different communication scenarios. We also show how users can assert constraints that provide additional optimization opportunities in a portable way. We demonstrate the utility of all described optimizations in a highly optimized implementation of neighborhood collective operations. Our communication and protocol optimizations result in a performance improvement of up to a factor of two for small stencil communications. We found that, for some patterns, our optimization heuristics automatically generate communication schedules that are comparable to hand-tuned collectives. With those optimizations in place, we are able to accelerate arbitrary collective communication patterns, such as regular and irregular stencils with optimization methods for collective communications. We expect that our methods will influence the design of future MPI libraries and provide a significant performance benefit on large-scale systems.", acknowledgement = ack-nhfb, articleno = "98", } @Article{Hori:2012:EKL, author = "Atsushi Hori and Toyohisa Kameyama and Yuichi Tsujita and Mitaro Namiki and Yutaka Ishikawa", title = "An Efficient Kernel-Level Blocking {MPI} Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "153--162", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_20", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_20/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Hormati:2012:SPS, author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and Trevor Mudge and Scott Mahlke", title = "{Sponge}: portable stream programming on graphics engines", journal = j-SIGPLAN, volume = "47", number = "4", pages = "381--392", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950409", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Graphics processing units (GPUs) provide a low cost platform for accelerating high performance computations. The introduction of new programming languages, such as CUDA and OpenCL, makes GPU programming attractive to a wide variety of programmers. However, programming GPUs is still a cumbersome task for two primary reasons: tedious performance optimizations and lack of portability. First, optimizing an algorithm for a specific GPU is a time-consuming task that requires a thorough understanding of both the algorithm and the underlying hardware. Unoptimized CUDA programs typically only achieve a small fraction of the peak GPU performance. Second, GPU code lacks efficient portability as code written for one GPU can be inefficient when executed on another. Moving code from one GPU to another while maintaining the desired performance is a non-trivial task often requiring significant modifications to account for the hardware differences. In this work, we propose Sponge, a compilation framework for GPUs using synchronous data flow streaming languages. Sponge is capable of performing a wide variety of optimizations to generate efficient code for graphics engines. Sponge alleviates the problems associated with current GPU programming methods by providing portability across different generations of GPUs and CPUs, and a better abstraction of the hardware details, such as the memory hierarchy and threading model. Using streaming, we provide a write-once software paradigm and rely on the compiler to automatically create optimized CUDA code for a wide variety of GPU targets. Sponge's compiler optimizations improve the performance of the baseline CUDA implementations by an average of 3.2x.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '12 conference proceedings.", } @Article{Hosking:2012:CHL, author = "Tony Hosking", title = "Compiling a high-level language for {GPUs}: (via language support for architectures and compilers)", journal = j-SIGPLAN, volume = "47", number = "6", pages = "1--12", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254066", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2000.bib; https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "Languages such as OpenCL and CUDA offer a standard interface for general-purpose programming of GPUs. However, with these languages, programmers must explicitly manage numerous low-level details involving communication and synchronization. This burden makes programming GPUs difficult and error-prone, rendering these powerful devices inaccessible to most programmers. We desire a higher-level programming model that makes GPUs more accessible while also effectively exploiting their computational power. This paper presents features of Lime, a new Java-compatible language targeting heterogeneous systems, that allow an optimizing compiler to generate high quality GPU code. The key insight is that the language type system enforces isolation and immutability invariants that allow the compiler to optimize for a GPU without heroic compiler analysis. Our compiler attains GPU speedups between 75\% and 140\% of the performance of native OpenCL code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Hursey:2012:AFA, author = "Joshua Hursey and Richard L. Graham", title = "Analyzing fault aware collective performance in a process fault tolerant {MPI}", journal = j-PARALLEL-COMPUTING, volume = "38", number = "1--2", pages = "15--25", month = jan # "\slash " # feb, year = "2012", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.10.010", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:36 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001414", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Book{Hwu:2012:GCG, editor = "Wen-mei Hwu", title = "{GPU} computing gems", publisher = "Morgan Kaufmann", address = "Boston, MA", edition = "Jade", pages = "xvi + 541 + 16", year = "2012", ISBN = "0-12-385963-8 (hardback)", ISBN-13 = "978-0-12-385963-1 (hardback)", LCCN = "T385 .G6875 2012", bibdate = "Sat Feb 8 18:16:05 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = "Applications of GPU computing series", abstract = "Since the introduction of CUDA in 2007, more than 100 million computers with CUDA capable GPUs have been shipped to end users. GPU computing application developers can now expect their application to have a mass market. With the introduction of OpenCL in 2010, researchers can now expect to develop GPU applications that can run on hardware from multiple vendors.", acknowledgement = ack-nhfb, subject = "Graphics processing units; Programming; Imaging systems; Computer graphics; Image processing; Digital techniques", tableofcontents = "Part 1: Parallel Algorithms and Data Structures --- Paulius Micikevicius, NVIDIA \\ 1 Large-Scale GPU Search \\ 2 Edge v. Node Parallelism for Graph Centrality Metrics \\ 3 Optimizing parallel prefix operations for the Fermi architecture \\ 4 Building an Efficient Hash Table on the GPU \\ 5 An Efficient CUDA Algorithm for the Maximum Network Flow Problem \\ 6 On Improved Memory Access Patterns for Cellular Automata Using CUDA \\ 7 Fast Minimum Spanning Tree Computation on Large Graphs \\ 8 Fast in-place sorting with CUDA based on bitonic sort \\ Part 2: Numerical Algorithms --- Frank Jargstorff, NVIDIA \\ 9 Interval Arithmetic in CUDA \\ 10 Approximating the erfinv Function \\ 11 A Hybrid Method for Solving Tridiagonal Systems on the GPU \\ 12 LU Decomposition in CULA \\ 13 GPU Accelerated Derivative-free Optimization \\ Part 3: Engineering Simulation --- Peng Wang, NVIDIA \\ 14 Large-scale gas turbine simulations on GPU clusters \\ 15 GPU acceleration of rarefied gas dynamic simulations \\ 16 Assembly of Finite Element Methods on Graphics Processors \\ 17 CUDA implementation of Vertex-Centered, Finite Volume CFD methods on Unstructured Grids with Flow Control Applications \\ 18 Solving Wave Equations on Unstructured Geometries \\ 19 Fast electromagnetic integral equation solvers on graphics processing units (GPUs) \\ Part 4: Interactive Physics for Games and Engineering Simulation --- Richard Tonge, NVIDIA \\ 20 Solving Large Multi-Body Dynamics Problems on the GPU \\ 21 Implicit FEM Solver in CUDA \\ 22 Real-time Adaptive GPU multi-agent path planning \\ Part 5: Computational Finance --- Thomas Bradley, NVIDIA \\ 23 High performance finite difference PDE solvers on GPUs for financial option pricing \\ 24 Identifying and Mitigating Credit Risk using Large-scale Economic Capital Simulations \\ 25 Financial Market Value-at-Risk Estimation using the Monte Carlo Method \\ Part 6: Programming Tools and Techniques --- Cliff Wooley, NVIDIA \\ 26 Thrust: A Productivity-Oriented Library for CUDA \\ 27 GPU Scripting and Code Generation with PyCUDA \\ 28 Jacket: GPU Powered MATLAB Acceleration \\ 29 Accelerating Development and Execution Speed with Just In Time GPU Code Generation \\ 30 GPU Application Development, Debugging, and Performance Tuning with GPU Ocelot \\ 31 Abstraction for AoS and SoA Layout in C++ \\ 32 Processing Device Arrays with C++ Metaprogramming \\ 33 GPU Metaprogramming: A Case Study in Biologically-Inspired Machine Vision \\ 34 A Hybridization Methodology for High-Performance Linear Algebra Software for GPUs \\ 35 Dynamic Load Balancing using Work-Stealing \\ 36 Applying software-managed caching and CPU/GPU task scheduling for accelerating dynamic workloads", } @Article{Jiang:2012:OSP, author = "Lei Jiang and Pragneshkumar B. Patel and George Ostrouchov and Ferdinand Jamitzky", title = "{OpenMP}-style parallelism in data-centered multicore computing with {R}", journal = j-SIGPLAN, volume = "47", number = "8", pages = "335--336", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145882", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "R$^1$ is a domain specific language widely used for data analysis by the statistics community as well as by researchers in finance, biology, social sciences, and many other disciplines. As R programs are linked to input data, the exponential growth of available data makes high-performance computing with R imperative. To ease the process of writing parallel programs in R, code transformation from a sequential program to a parallel version would bring much convenience to R users. In this paper, we present our work in semi-automatic parallelization of R codes with user-added OpenMP-style pragmas. While such pragmas are used at the frontend, we take advantage of multiple parallel backends with different R packages. We provide flexibility for importing parallelism with plug-in components, impose built-in MapReduce for data processing, and also maintain code reusability. We illustrate the advantage of the on-the-fly mechanisms which can lead to significant applications in data-centered parallel computing.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Johnson:2012:FOL, author = "Tim Johnson and Pierre Fite-Georgel and Rahul Raguram and Jan-Michael Frahm", title = "Fast Organization of Large Photo Collections Using {CUDA}", journal = j-LECT-NOTES-COMP-SCI, volume = "6554", pages = "463--476", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-35740-4_36", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Dec 24 08:20:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-35740-4_36", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-35740-4", book-URL = "http://www.springerlink.com/content/978-3-642-35740-4", fjournal = "Lecture Notes in Computer Science", } @Article{Kakimoto:2012:PCG, author = "Takeshi Kakimoto and Keisuke Dohi and Yuichiro Shibata and Kiyoshi Oguri", title = "Performance comparison of {GPU} programming frameworks with the striped {Smith--Waterman} algorithm", journal = j-COMP-ARCH-NEWS, volume = "40", number = "5", pages = "70--75", month = dec, year = "2012", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2460216.2460229", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Sun May 5 09:49:56 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", note = "HEART '12 conference proceedings.", abstract = "This paper evaluates and discusses how different GPU programming frameworks affect the performance obtained from GPU acceleration of the striped smith-waterman algorithm used for biological sequence alignment. A total of 6 GPU implementations of the algorithm on NVIDIA GT200b and AMD RV870 using the CUDA and the OpenCL frameworks are compared to analyze cons and pros of explicit descriptions for architecture specific hardware mechanisms in the code. The evaluation results show that the primitive descriptions with the CUDA are still efficient especially for small size data, while better instruction scheduling and optimizations are carried out by the OpenCL compiler. On the other hand, the combination of OpenCL and RV870 which provides a relatively simple view of the architecture is efficient for the large data size.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Kanal:2012:MMC, author = "M. E. Kanal and M. Demiralp", title = "A modified method of calculating {High Dimensional Model Representation (HDMR) Terms} for parallelization with {MPI} and {CUDA}", journal = j-J-SUPERCOMPUTING, volume = "62", number = "1", pages = "199--213", month = oct, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Oct 26 07:42:33 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=62&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=62&issue=1&spage=199", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Kanal:2012:PAI, author = "M. E. Kanal", title = "Parallel algorithm on inversion for adjacent pentadiagonal matrices with {MPI}", journal = j-J-SUPERCOMPUTING, volume = "59", number = "2", pages = "1071--1078", month = feb, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Apr 6 17:44:43 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=2&spage=1071", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Karrenberg:2012:IPO, author = "Ralf Karrenberg and Sebastian Hack", title = "Improving Performance of {OpenCL} on {CPUs}", journal = j-LECT-NOTES-COMP-SCI, volume = "7210", pages = "1--20", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-28652-0_1", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:26:22 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-28652-0_1/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-28652-0", book-URL = "http://www.springerlink.com/content/978-3-642-28652-0", fjournal = "Lecture Notes in Computer Science", } @Article{Kim:2012:OUP, author = "Jungwon Kim and Sangmin Seo and Jun Lee and Jeongho Nah and Gangwon Jo and Jaejin Lee", title = "{OpenCL} as a unified programming model for heterogeneous {CPU\slash GPU} clusters", journal = j-SIGPLAN, volume = "47", number = "8", pages = "299--300", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145863", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "In this paper, we propose an OpenCL framework for heterogeneous CPU/GPU clusters, and show that the framework achieves both high performance and ease of programming. The framework provides an illusion of a single system for the user. It allows the application to utilize multiple heterogeneous compute devices, such as multicore CPUs and GPUs, in a remote node as if they were in a local node. No communication API, such as the MPI library, is required in the application source. We implement the OpenCL framework and evaluate its performance on a heterogeneous CPU/GPU cluster that consists of one host node and nine compute nodes using eleven OpenCL benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Kjolstad:2012:ADG, author = "Fredrik Kjolstad and Torsten Hoefler and Marc Snir", title = "Automatic datatype generation and optimization", journal = j-SIGPLAN, volume = "47", number = "8", pages = "327--328", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145878", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Many high performance applications spend considerable time packing noncontiguous data into contiguous communication buffers. MPI Datatypes provide an alternative by describing noncontiguous data layouts. This allows sophisticated hardware to retrieve data directly from application data structures. However, packing codes in real-world applications are often complex and specifying equivalent datatypes is difficult, time-consuming, and error prone. We present an algorithm that automates the transformation. We have implemented the algorithm in a tool that transforms packing code to MPI Datatypes, and evaluated it by transforming 90 packing codes from the NAS Parallel Benchmarks. The transformation allows easy porting of applications to new machines that benefit from datatypes, thus improving programmer productivity.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Klemm:2012:EOV, author = "Michael Klemm and Alejandro Duran and Xinmin Tian and Hideki Saito and Diego Caballero", title = "Extending {OpenMP*} with Vector Constructs for Modern Multicore {SIMD} Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "59--72", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_5/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Klockner:2012:PPS, author = "Andreas Kl{\"o}ckner and Nicolas Pinto and Yunsup Lee and Bryan Catanzaro and Paul Ivanov and Ahmed Fasih", title = "{PyCUDA} and {PyOpenCL}: a scripting-based approach to {GPU} run-time code generation", journal = j-PARALLEL-COMPUTING, volume = "38", number = "3", pages = "157--174", month = mar, year = "2012", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.09.001", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:36 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001281", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Kwon:2012:HAO, author = "Okwan Kwon and Fahed Jubair and Rudolf Eigenmann and Samuel Midkiff", title = "A hybrid approach of {OpenMP} for clusters", journal = j-SIGPLAN, volume = "47", number = "8", pages = "75--84", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145827", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present the first fully automated compiler-runtime system that successfully translates and executes OpenMP shared-address-space programs on laboratory-size clusters, for the complete set of regular, repetitive applications in the NAS Parallel Benchmarks. We introduce a hybrid compiler-runtime translation scheme. Compared to previous work, this scheme features a new runtime data flow analysis and new compiler techniques for improving data affinity and reducing communication costs. We present and discuss the performance of our translated programs, and compare them with the performance of the MPI, HPF and UPC versions of the benchmarks. The results show that our translated programs achieve 75\% of the hand-coded MPI programs, on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Lashuk:2012:MPA, author = "Ilya Lashuk and Aparna Chandramowlishwaran and Harper Langston and Tuan-Anh Nguyen and Rahul Sampath and Aashay Shringarpure and Richard Vuduc and Lexing Ying and Denis Zorin and George Biros", title = "A massively parallel adaptive fast multipole method on heterogeneous architectures", journal = j-CACM, volume = "55", number = "5", pages = "101--109", month = may, year = "2012", CODEN = "CACMA2", DOI = "https://doi.org/10.1145/2160718.2160740", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Wed May 9 07:19:14 MDT 2012", bibsource = "http://www.acm.org/pubs/contents/journals/cacm/; https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/cacm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "We describe a parallel fast multipole method (FMM) for highly nonuniform distributions of particles. We employ both distributed memory parallelism (via MPI) and shared memory parallelism (via OpenMP and GPU acceleration) to rapidly evaluate two-body nonoscillatory potentials in three dimensions on heterogeneous high performance computing architectures. We have performed scalability tests with up to 30 billion particles on 196,608 cores on the AMD/CRAY-based Jaguar system at ORNL. On a GPU-enabled system (NSF's Keeneland at Georgia Tech/ORNL), we observed 30$ \times $ speedup over a single core CPU and 7$ \times $ speedup over a multicore CPU implementation. By combining GPUs with MPI, we achieve less than 10 ns/particle and six digits of accuracy for a run with 48 million nonuniformly distributed particles on 192 GPUs.", acknowledgement = ack-nhfb, fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", } @InProceedings{Lee:2012:EED, author = "Seyong Lee and Jeffrey S. Vetter", title = "Early evaluation of directive-based {GPU} programming models for productive exascale computing", crossref = "Hollingsworth:2012:SPI", pages = "23:1--23:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a051.pdf", abstract = "Graphics Processing Unit (GPU)-based parallel computer architectures have shown increased popularity as a building block for high performance computing, and possibly for future Exascale computing. However, their programming complexity remains as a major hurdle for their widespread adoption. To provide better abstractions for programming GPU architectures, researchers and vendors have proposed several directive-based GPU programming models. These directive-based models provide different levels of abstraction, and required different levels of programming effort to port and optimize applications. Understanding these differences among these new models provides valuable insights on their applicability and performance potential. In this paper, we evaluate existing directive-based models by porting thirteen application kernels from various scientific domains to use CUDA GPUs, which, in turn, allows us to identify important issues in the functionality, scalability, tunability, and debuggability of the existing models. Our evaluation shows that directive-based models can achieve reasonable performance, compared to hand-written GPU codes.", acknowledgement = ack-nhfb, articleno = "23", } @InProceedings{Lee:2012:SMO, author = "Jaejin Lee", editor = "????", booktitle = "{ATIP '12: Proceedings of the ATIP\slash A*CRC Workshop on Accelerator Technologies for High-Performance Computing: Does Asia Lead the Way?}", title = "{SnuCL} and an {MPI $+$ OpenCL} implementation of {HPL} on heterogeneous {CPU\slash GPU} clusters", publisher = pub-ACM, address = pub-ACM:adr, pages = "??--??", year = "2012", ISBN = "1-4503-1644-1", ISBN-13 = "978-1-4503-1644-6", LCCN = "????", bibdate = "Wed Nov 14 11:00:18 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, remark = "Publisher has only PDF of 28 lecture slides", } @InProceedings{Levesque:2012:HEA, author = "John M. Levesque and Ramanan Sankaran and Ray Grout", title = "Hybridizing {S3D} into an exascale application using {OpenACC}: an approach for moving to multi-petaflops and beyond", crossref = "Hollingsworth:2012:SPI", pages = "15:1--15:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a040.pdf", abstract = "Hybridization is the process of converting an application with a single level of parallelism to an application with multiple levels of parallelism. Over the past 15 years a majority of the applications that run on High Performance Computing systems have employed MPI for all of the parallelism within the application. In the Peta-Exascale computing regime, effective utilization of the hardware requires multiple levels of parallelism matched to the macro architecture of the system to achieve good performance. A hybridized code base is performance portable when sufficient parallelism is expressed in an architecture agnostic form to achieve good performance on a range of available systems. The hybridized S3D code is performance portable across today's leading many core and GPU accelerated systems. The OpenACC framework allows a unified code base to be deployed for either (Manycore CPU or Manycore CPU+GPU) while permitting architecture specific optimizations to expose new dimensions of parallelism to be utilized.", acknowledgement = ack-nhfb, articleno = "15", } @InProceedings{Li:2012:PFA, author = "Peng Li and Guodong Li and Ganesh Gopalakrishnan", title = "Parametric flows: automated behavior equivalencing for symbolic analysis of races in {CUDA} programs", crossref = "Hollingsworth:2012:SPI", pages = "29:1--29:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a009.pdf", abstract = "The growing scale of concurrency requires automated abstraction techniques to cut down the effort in concurrent system analysis. In this paper, we show that the high degree of behavioral symmetry present in GPU programs allows CUDA race detection to be dramatically simplified through abstraction. Our abstraction techniques is one of automatically creating parametric flows ---control-flow equivalence classes of threads that diverge in the same manner---and checking for data races only across a pair of threads per parametric flow. We have implemented this approach as an extension of our recently proposed GKLEE symbolic analysis framework and show that all our previous results are dramatically improved in that (i) the parametric flow-based analysis takes far less time, and (ii) because of the much higher scalability of the analysis, we can detect even more data race situations that were previously missed by GKLEE because it was forced to downscale examples to limit analysis complexity. Moreover, the parametric flow-based analysis is applicable to other programs with SPMD models.", acknowledgement = ack-nhfb, articleno = "29", } @Article{Lima:2012:PEO, author = "Antonio M. Lima and Marco A. S. Netto and Thais Webber and Ricardo M. Czekster and Cesar A. F. {De Rose} and Paulo Fernandes", title = "Performance evaluation of {OpenMP}-based algorithms for handling {Kronecker} descriptors", journal = j-J-PAR-DIST-COMP, volume = "72", number = "5", pages = "678--692", month = may, year = "2012", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2012.02.001", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 28 08:37:48 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731512000354", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Ling:2012:HPP, author = "Cheng Ling and Khaled Benkrid and Tsuyoshi Hamada", title = "High performance phylogenetic analysis on {CUDA}-compatible {GPUs}", journal = j-COMP-ARCH-NEWS, volume = "40", number = "5", pages = "52--57", month = dec, year = "2012", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2460216.2460226", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Sun May 5 09:49:56 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", note = "HEART '12 conference proceedings.", abstract = "The operation of phylogenetic analysis aims to investigate the evolution and relationships among species. It is widely used in the fields of system biology and comparative genomics. However, phylogenetic analysis is also a computationally intensive operation as the number of tree topology grows in a factorial way with the number of species involved. Therefore, due to the large number of species in the real world, the computational burden has largely thwarted phylogenetic reconstruction. In this paper, we describe the detailed GPU-based multi-threaded design and implementation of a Markov Chain Monte Carlo (MCMC) maximum likelihood algorithm for phylogenetic analysis on a set of aligned nucleotide sequences. The implementation is based on the framework of the most widely used phylogenetic analysis tool, namely MrBayes. The proposed approach resulted in 6x-8x speed-up on an NVidia Geforce 460 GTX GPU compared to an optimized GPP-based software implementation running on a desktop computer with a single Intel Xeon 2.53 GHz CPU and 6.0 GB RAM.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Maheo:2012:AOL, author = "Aur{\`e}le Mah{\'e}o and Souad Kolia{\"\i} and Patrick Carribault and Marc P{\'e}rache and William Jalby", title = "Adaptive {OpenMP} for Large {NUMA} Nodes", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "254--257", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_20", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_20/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Mainland:2012:EHM, author = "Geoffrey Mainland", title = "Explicitly heterogeneous metaprogramming with {MetaHaskell}", journal = j-SIGPLAN, volume = "47", number = "9", pages = "311--322", month = sep, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2398856.2364572", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Nov 15 16:40:19 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Languages with support for metaprogramming, like MetaOCaml, offer a principled approach to code generation by guaranteeing that well-typed metaprograms produce well-typed programs. However, many problem domains where metaprogramming can fruitfully be applied require generating code in languages like C, CUDA, or assembly. Rather than resorting to add-hoc code generation techniques, these applications should be directly supported by explicitly heterogeneous metaprogramming languages. We present MetaHaskell, an extension of Haskell 98 that provides modular syntactic and type system support for type safe metaprogramming with multiple object languages. Adding a new object language to MetaHaskell requires only minor modifications to the host language to support type-level quantification over object language types and propagation of type equality constraints. We demonstrate the flexibility of our approach through three object languages: a core ML language, a linear variant of the core ML language, and a subset of C. All three languages support metaprogramming with open terms and guarantee that well-typed MetaHaskell programs will only produce closed object terms that are well-typed. The essence of MetaHaskell is captured in a type system for a simplified metalanguage. MetaHaskell, as well as all three object languages, are fully implemented in the mhc bytecode compiler.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ICFP '12 conference proceedings.", } @Article{Malits:2012:ELG, author = "Roman Malits and Evgeny Bolotin and Avinoam Kolodny and Avi Mendelson", title = "Exploring the limits of {GPGPU} scheduling in control flow bound applications", journal = j-TACO, volume = "8", number = "4", pages = "29:1--29:??", month = jan, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2086696.2086708", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Sat Jan 21 07:49:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "GPGPUs are optimized for graphics, for that reason the hardware is optimized for massively data parallel applications characterized by predictable memory access patterns and little control flow. For such applications' e.g., matrix multiplication, GPGPU based system can achieve very high performance. However, many general purpose data parallel applications are characterized as having intensive control flow and unpredictable memory access patterns. Optimizing the code in such problems for current hardware is often ineffective and even impractical since it exhibits low hardware utilization leading to relatively low performance. This work tracks the root causes of execution inefficacies when running control flow intensive CUDA applications on NVIDIA GPGPU hardware.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Marongiu:2012:OCE, author = "Andrea Marongiu and Luca Benini", title = "An {OpenMP} Compiler for Efficient Use of Distributed Scratchpad Memory in {MPSoCs}", journal = j-IEEE-TRANS-COMPUT, volume = "61", number = "2", pages = "222--236", month = feb, year = "2012", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2010.199", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Fri Jan 13 17:55:10 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Martins:2012:PDC, author = "Wellington S. Martins and Thiago F. Rangel", title = "Phylogenetic Distance Computation Using {CUDA}", journal = j-LECT-NOTES-COMP-SCI, volume = "7409", pages = "168--178", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-31927-3_15", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:21:56 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012g.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-31927-3_15/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-31927-3", book-URL = "http://www.springerlink.com/content/978-3-642-31927-3", fjournal = "Lecture Notes in Computer Science", } @Article{Massetto:2012:NSB, author = "Francisco Isidro Massetto and Liria Matsumoto Sato and Kuan-Ching Li", title = "A novel strategy for building interoperable {MPI} environment in heterogeneous high performance systems", journal = j-J-SUPERCOMPUTING, volume = "60", number = "1", pages = "87--116", month = apr, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Apr 6 17:45:24 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=87", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Mehta:2012:SPE, author = "Kshitij Mehta and Edgar Gabriel and Barbara Chapman", title = "Specification and Performance Evaluation of Parallel {I/O} Interfaces for {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "1--14", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_1", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_1/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Mittal:2012:CAS, author = "Anshul Mittal and Nikhil Jain and Thomas George and Yogish Sabharwal and Sameer Kumar", title = "Collective algorithms for sub-communicators", journal = j-SIGPLAN, volume = "47", number = "8", pages = "315--316", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145872", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "Collective communication over a group of processors is an integral and time consuming component in many HPC applications. Many modern day supercomputers are based on torus interconnects. On such systems, for an irregular communicator comprising of a subset of processors, the algorithms developed so far are not contention free in general and hence non-optimal. In this paper, we present a novel contention-free algorithm to perform collective operations over a subset of processors in a torus network. We also extend previous work on regular communicators to handle special cases of irregular communicators that occur frequently in parallel scientific applications. For the generic case where multiple node disjoint sub-communicators communicate simultaneously in a loosely synchronous fashion, we propose a novel cooperative approach to route the data for individual sub-communicators without contention. Empirical results demonstrate that our algorithms outperform the optimized MPI collective implementation on IBM's Blue Gene/P supercomputer for large data sizes and random node distributions.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Muller:2012:SOA, author = "Matthias S. M{\"u}ller and John Baron and William C. Brantley and Huiyu Feng and Daniel Hackenberg", title = "{SPEC OMP2012}--- An Application Benchmark Suite for Parallel Systems Using {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "223--236", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_17", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_17/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Neuberger:2012:MIS, author = "John M. Neuberger and N{\'a}ndor Sieben and James W. Swift", title = "An {MPI} Implementation of a Self-Submitting Parallel Job Queue", journal = j-INT-J-PARALLEL-PROG, volume = "40", number = "4", pages = "443--464", month = aug, year = "2012", CODEN = "IJPPE5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Fri Oct 26 07:12:55 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=40&issue=4; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=40&issue=4&spage=443", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Ng:2012:STT, author = "Nicholas Ng and Nobuko Yoshida and Xin Yu Niu and Kuen Hung Tsoi", title = "Session types: towards safe and fast reconfigurable programming", journal = j-COMP-ARCH-NEWS, volume = "40", number = "5", pages = "22--27", month = dec, year = "2012", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2460216.2460221", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Sun May 5 09:49:56 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", note = "HEART '12 conference proceedings.", abstract = "This paper introduces a new programming framework based on the theory of session types for safe, reconfigurable parallel designs. We apply the session type theory to C and Java programming languages and demonstrate that the session-based languages can offer a clear and tractable framework to describe communications between parallel components and guarantee communication-safety and deadlock-freedom by compile-time type checking. Many representative communication topologies such as a ring or scatter-gather can be programmed and verified in session-based programming languages. Case studies involving N-body simulation and Kmeans clustering are used to illustrate the session-based programming style and to demonstrate that the session-based languages perform competitively against MPI counterparts in an FPGA-based heterogeneous cluster, as well as the potential of integrating them with FPGA acceleration.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @InProceedings{Nguyen:2012:BTM, author = "Tan Nguyen and Pietro Cicotti and Eric Bylaska and Dan Quinlan and Scott B. Baden", title = "{Bamboo}: translating {MPI} applications to a latency-tolerant, data-driven form", crossref = "Hollingsworth:2012:SPI", pages = "39:1--39:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a032.pdf", abstract = "We present Bamboo, a custom source-to-source translator that transforms MPI C source into a data-driven form that automatically overlaps communication with available computation. Running on up to 98304 processors of NERSC's Hopper system, we observe that Bamboo's overlap capability speeds up MPI implementations of a 3D Jacobi iterative solver and Cannon's matrix multiplication. Bamboo's generated code meets or exceeds the performance of hand optimized MPI, which includes split-phase coding, the method classically employed to hide communication. We achieved our results with only modest amounts of programmer annotation and no intrusive reprogramming of the original application source.", acknowledgement = ack-nhfb, articleno = "39", } @Article{Nguyen:2012:SCS, author = "Donald Nguyen and Keshav Pingali", title = "Synthesizing concurrent schedulers for irregular algorithms", journal = j-SIGPLAN, volume = "47", number = "4", pages = "333--344", month = apr, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2248487.1950404", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 7 08:15:03 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scheduling is the assignment of tasks or activities to processors for execution, and it is an important concern in parallel programming. Most prior work on scheduling has focused either on static scheduling of applications in which the dependence graph is known at compile-time or on dynamic scheduling of independent loop iterations such as in OpenMP. In irregular algorithms, dependences between activities are complex functions of runtime values so these algorithms are not amenable to compile-time analysis nor do they consist of independent activities. Moreover, the amount of work can vary dramatically with the scheduling policy. To handle these complexities, implementations of irregular algorithms employ carefully handcrafted, algorithm-specific schedulers but these schedulers are themselves parallel programs, complicating the parallel programming problem further. In this paper, we present a flexible and efficient approach for specifying and synthesizing scheduling policies for irregular algorithms. We develop a simple compositional specification language and show how it can concisely encode scheduling policies in the literature. Then, we show how to synthesize efficient parallel schedulers from these specifications. We evaluate our approach for five irregular algorithms on three multicore architectures and show that (1) the performance of some algorithms can improve by orders of magnitude with the right scheduling policy, and (2) for the same policy, the overheads of our synthesized schedulers are comparable to those of fixed-function schedulers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '12 conference proceedings.", } @Article{Nobari:2012:SPM, author = "Sadegh Nobari and Thanh-Tung Cao and Panagiotis Karras and St{\'e}phane Bressan", title = "Scalable parallel minimum spanning forest computation", journal = j-SIGPLAN, volume = "47", number = "8", pages = "205--214", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145842", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "The proliferation of data in graph form calls for the development of scalable graph algorithms that exploit parallel processing environments. One such problem is the computation of a graph's minimum spanning forest (MSF). Past research has proposed several parallel algorithms for this problem, yet none of them scales to large, high-density graphs. In this paper we propose a novel, scalable, parallel MSF algorithm for undirected weighted graphs. Our algorithm leverages Prim's algorithm in a parallel fashion, concurrently expanding several subsets of the computed MSF. Our effort focuses on minimizing the communication among different processors without constraining the local growth of a processor's computed subtree. In effect, we achieve a scalability that previous approaches lacked. We implement our algorithm in CUDA, running on a GPU and study its performance using real and synthetic, sparse as well as dense, structured and unstructured graph data. Our experimental study demonstrates that our algorithm outperforms the previous state-of-the-art GPU-based MSF algorithm, while being several orders of magnitude faster than sequential CPU-based algorithms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Notz:2012:GBS, author = "Patrick K. Notz and Roger P. Pawlowski and James C. Sutherland", title = "Graph-Based Software Design for Managing Complexity and Enabling Concurrency in Multiphysics {PDE} Software", journal = j-TOMS, volume = "39", number = "1", pages = "1:1--1:21", month = nov, year = "2012", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2382585.2382586", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Thu Dec 6 07:36:30 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "Multiphysics simulation software is plagued by complexity stemming from nonlinearly coupled systems of Partial Differential Equations (PDEs). Such software typically supports many models, which may require different transport equations, constitutive laws, and equations of state. Strong coupling and a multiplicity of models leads to complex algorithms (i.e., the properly ordered sequence of steps to assemble a discretized set of coupled PDEs) and rigid software. This work presents a design strategy that shifts focus away from high-level algorithmic concerns to low-level data dependencies. Mathematical expressions are represented as software objects that directly expose data dependencies. The entire system of expressions forms a directed acyclic graph and the high-level assembly algorithm is generated automatically through standard graph algorithms. This approach makes problems with complex dependencies entirely tractable, and removes virtually all logic from the algorithm itself. Changes are highly localized, allowing developers to implement models without detailed understanding of any algorithms (i.e., the overall assembly process). Furthermore, this approach complements existing MPI-based frameworks and can be implemented within them easily. Finally, this approach enables algorithmic parallelization via threads. By exposing dependencies in the algorithm explicitly, thread-based parallelism is implemented through algorithm decomposition, providing a basis for exploiting parallelism independent from domain decomposition approaches.", acknowledgement = ack-nhfb, articleno = "1", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @InProceedings{Nukada:2012:SMG, author = "Akira Nukada and Kento Sato and Satoshi Matsuoka", title = "Scalable multi-{GPU} {$3$-D} {FFT} for {TSUBAME 2.0} supercomputer", crossref = "Hollingsworth:2012:SPI", pages = "44:1--44:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a044.pdf", abstract = "For scalable 3-D FFT computation using multiple GPUs, efficient all-to-all communication between GPUs is the most important factor in good performance. Implementations with point-to-point MPI library functions and CUDA memory copy APIs typically exhibit very large overheads especially for small message sizes in all-to-all communications between many nodes. We propose several schemes to minimize the overheads, including employment of lower-level API of InfiniBand to effectively overlap intra- and inter-node communication, as well as auto-tuning strategies to control scheduling and determine rail assignments. As a result we achieve very good strong scalability as well as good performance, up to 4.8TFLOPS using 256 nodes of TSUBAME 2.0 Supercomputer (768 GPUs) in double precision.", acknowledgement = ack-nhfb, articleno = "44", } @Article{OBroin:2012:OIS, author = "Cathal {{\'O} Broin} and L. A. A. Nikolopoulos", title = "An {OpenCL} implementation for the solution of the time-dependent {Schr{\"o}dinger} equation on {GPUs} and {CPUs}", journal = j-COMP-PHYS-COMM, volume = "183", number = "10", pages = "2071--2080", month = oct, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2012.05.009", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu Jun 28 15:53:26 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001774", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Oh:2012:MOO, author = "Kwang Jin Oh and Ji Hoon Kang and Hun Joo Myung", title = "{mm\_par2.0}: An object-oriented molecular dynamics simulation program parallelized using a hierarchical scheme with {MPI} and {OPENMP}", journal = j-COMP-PHYS-COMM, volume = "183", number = "2", pages = "440--441", month = feb, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2011.08.023", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 11 10:11:01 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465511003407", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Oliveira:2012:CCO, author = "Rafael Sachetto Oliveira and Bernardo Martins Rocha and Ronan Mendon{\c{c}}a Amorim", title = "Comparing {CUDA}, {OpenCL} and {OpenGL} Implementations of the Cardiac Monodomain Equations", journal = j-LECT-NOTES-COMP-SCI, volume = "7204", pages = "111--120", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-31500-8_12", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:26:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012c.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_12/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-31500-8", book-URL = "http://www.springerlink.com/content/978-3-642-31500-8", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Olivier:2012:CMW, author = "Stephen L. Olivier and Bronis R. de Supinski and Martin Schulz and Jan F. Prins", title = "Characterizing and mitigating work time inflation in task parallel programs", crossref = "Hollingsworth:2012:SPI", pages = "65:1--65:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a066.pdf", abstract = "Task parallelism raises the level of abstraction in shared memory parallel programming to simplify the development of complex applications. However, task parallel applications can exhibit poor performance due to thread idleness, scheduling overheads, and work time inflation --- additional time spent by threads in a multithreaded computation beyond the time required to perform the same work in a sequential computation. We identify the contributions of each factor to lost efficiency in various task parallel OpenMP applications and diagnose the causes of work time inflation in those applications. Increased data access latency can cause significant work time inflation in NUMA systems. Our locality framework for task parallel OpenMP programs mitigates this cause of work time inflation. Our extensions to the Qthreads library demonstrate that locality-aware scheduling can improve performance up to 3X compared to the Intel OpenMP task scheduler.", acknowledgement = ack-nhfb, articleno = "65", } @Article{Olivier:2012:OTS, author = "Stephen L. Olivier and Allan K. Porterfield and Kyle B. Wheeler and Michael Spiegel and Jan F. Prins", title = "{OpenMP} task scheduling strategies for multicore {NUMA} systems", journal = j-IJHPCA, volume = "26", number = "2", pages = "110--124", month = may, year = "2012", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342011434065", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Thu Nov 8 11:31:13 MST 2012", bibsource = "http://hpc.sagepub.com/content/26/2.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/26/2/110.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "February 7, 2012", } @Article{Perla:2012:PAH, author = "Francesca Perla and Paolo Zanetti", title = "Performance Analysis of an Hybrid {MPI\slash OpenMP} {ALM} Software for Life Insurance Policies on Multi-core Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "250--253", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_19", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_19/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Preissl:2012:CSS, author = "Robert Preissl and Theodore M. Wong and Pallab Datta and Myron Flickner and Raghavendra Singh and Steven K. Esser and William P. Risk and Horst D. Simon and Dharmendra S. Modha", title = "{Compass}: a scalable simulator for an architecture for cognitive computing", crossref = "Hollingsworth:2012:SPI", pages = "54:1--54:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a085.pdf", abstract = "Inspired by the function, power, and volume of the organic brain, we are developing TrueNorth, a novel modular, non-von Neumann, ultra-low power, compact architecture. TrueNorth consists of a scalable network of neurosynaptic cores, with each core containing neurons, dendrites, synapses, and axons. To set sail for TrueNorth, we developed Compass, a multi-threaded, massively parallel functional simulator and a parallel compiler that maps a network of long-distance pathways in the macaque monkey brain to TrueNorth. We demonstrate near-perfect weak scaling on a 16 rack IBM\reg{} Blue Gene\reg{}/Q (262144 CPUs, 256 TB memory), achieving an unprecedented scale of 256 million neurosynaptic cores containing 65 billion neurons and 16 trillion synapses running only 388X slower than real time with an average spiking rate of 8.1 Hz. By using emerging PGAS communication primitives, we also demonstrate 2X better real-time performance over MPI primitives on a 4 rack Blue Gene/P (16384 CPUs, 16 TB memory).", acknowledgement = ack-nhfb, articleno = "54", } @Article{Puzniakowski:2012:TOI, author = "Tadeusz Pu{\'z}niakowski and Marek A. Bednarczyk", title = "Towards an {OpenCL} Implementation of `Genetic Algorithms' on {GPUs}", journal = j-LECT-NOTES-COMP-SCI, volume = "7053", pages = "190--203", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-25261-7_15", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:16 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-25261-7_15/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-25261-7", book-URL = "http://www.springerlink.com/content/978-3-642-25261-7", fjournal = "Lecture Notes in Computer Science", } @Article{Qiu:2012:PWM, author = "Judy Qiu and Seung-Hee Bae", title = "Performance of windows multicore systems on threading and {MPI}", journal = j-CCPE, volume = "24", number = "1", pages = "14--28", month = jan, year = "2012", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.1762", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Jan 16 12:11:17 MST 2012", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Prac\-tice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "23 May 2011", } @InProceedings{Rietmann:2012:FAS, author = "Max Rietmann and Peter Messmer and Tarje Nissen-Meyer and Daniel Peter and Piero Basini and Dimitri Komatitsch and Olaf Schenk and Jeroen Tromp and Lapo Boschi and Domenico Giardini", title = "Forward and adjoint simulations of seismic wave propagation on emerging large-scale {GPU} architectures", crossref = "Hollingsworth:2012:SPI", pages = "38:1--38:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a104.pdf", abstract = "Computational seismology is an area of wide sociological and economic impact, ranging from earthquake risk assessment to subsurface imaging and oil and gas exploration. At the core of these simulations is the modeling of wave propagation in a complex medium. Here we report on the extension of the high-order finite-element seismic wave simulation package SPECFEM3D to support the largest scale hybrid and homogeneous supercomputers. Starting from an existing highly tuned MPI code, we migrated to a CUDA version. In order to be of immediate impact to the science mission of computational seismologists, we had to port the entire production package, rather than just individual kernels. One of the challenges in parallelizing finite element codes is the potential for race conditions during the assembly phase. We therefore investigated different methods such as mesh coloring or atomic updates on the GPU. In order to achieve strong scaling, we needed to ensure good overlap of data motion at all levels, including internode and host-accelerator transfers. Finally we carefully tuned the GPU implementation. The new MPI/CUDA solver exhibits excellent scalability and achieves speedup on a node-to-node basis over the carefully tuned equivalent multi-core MPI solver. To demonstrate the performance of both the forward and adjoint functionality, we present two case studies run on the Cray XE6 CPU and Cray XK6 GPU architectures up to 896 nodes: (1) focusing on most commonly used forward simulations, we simulate seismic wave propagation generated by earthquakes in Turkey, and (2) testing the most complex seismic inversion type of the package, we use ambient seismic noise to image 3-D crust and mantle structure beneath western Europe.", acknowledgement = ack-nhfb, articleno = "38", } @Article{Royuela:2012:ASO, author = "Sara Royuela and Alejandro Duran and Chunhua Liao and Daniel J. Quinlan", title = "Auto-scoping for {OpenMP} Tasks", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "29--43", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_3", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_3/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Rubio-Largo:2012:UMO, author = "{\'A}lvaro Rubio-Largo and Miguel A. Vega-Rodr{\'\i}guez and Juan A. G{\'o}mez-Pulido", title = "Using a Multiobjective {OpenMP+MPI DE} for the Static {RWA} Problem", journal = j-LECT-NOTES-COMP-SCI, volume = "6927", pages = "224--231", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-27549-4_29", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Dec 24 07:13:54 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-27549-4_29", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-27549-4", book-URL = "http://www.springerlink.com/content/978-3-642-27549-4", fjournal = "Lecture Notes in Computer Science", } @Article{Sabne:2012:ECO, author = "Amit Sabne and Putt Sakdhnagool and Rudolf Eigenmann", title = "Effects of Compiler Optimizations in {OpenMP} to {CUDA} Translation", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "169--181", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_13", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_13/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Samadi:2012:AIA, author = "Mehrzad Samadi and Amir Hormati and Mojtaba Mehrara and Janghaeng Lee and Scott Mahlke", title = "Adaptive input-aware compilation for graphics engines", journal = j-SIGPLAN, volume = "47", number = "6", pages = "13--22", month = jun, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345156.2254067", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:49 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PLDI '12 proceedings.", abstract = "While graphics processing units (GPUs) provide low-cost and efficient platforms for accelerating high performance computations, the tedious process of performance tuning required to optimize applications is an obstacle to wider adoption of GPUs. In addition to the programmability challenges posed by GPU's complex memory hierarchy and parallelism model, a well-known application design problem is target portability across different GPUs. However, even for a single GPU target, changing a program's input characteristics can make an already-optimized implementation of a program perform poorly. In this work, we propose Adaptic, an adaptive input-aware compilation system to tackle this important, yet overlooked, input portability problem. Using this system, programmers develop their applications in a high-level streaming language and let Adaptic undertake the difficult task of input portable optimizations and code generation. Several input-aware optimizations are introduced to make efficient use of the memory hierarchy and customize thread composition. At runtime, a properly optimized version of the application is executed based on the actual program input. We perform a head-to-head comparison between the Adaptic generated and hand-optimized CUDA programs. The results show that Adaptic is capable of generating codes that can perform on par with their hand-optimized counterparts over certain input ranges and outperform them when the input falls out of the hand-optimized programs' ``comfort zone''. Furthermore, we show that input-aware results are sustainable across different GPU targets making it possible to write and optimize applications once and run them anywhere.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Santos:2012:ICC, author = "Bruno F. L. Santos and Hendrik T. Macedo", title = "Improving {CUDA{\TM} C\slash C++} encoding readability to foster parallel application development", journal = j-SIGSOFT, volume = "37", number = "1", pages = "1--5", month = jan, year = "2012", CODEN = "SFENDP", DOI = "https://doi.org/10.1145/2088883.2088897", ISSN = "0163-5948 (print), 1943-5843 (electronic)", ISSN-L = "0163-5948", bibdate = "Wed Aug 1 17:16:09 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigsoft2010.bib", abstract = "Graphical Processing Units (GPUs) have recently been used to enable parallel application development. The most prominent initiative has been provided by NVIDIA{\TM} with the so-called CUDA{\TM} architecture, designed to GeForce{\TM} graphic cards. However, even with CUDA C-like programming language, parallel codification remains somewhat awkward if compared to sequential codification. The programmer still has to deal with low-level hardware details such as generation and synchronization of threads and GPU tracks and sectors. In this paper, we propose a programmer-friendly interface for CUDA-C programming, in such a way that most hardware details are hidden from the programmer. We show how code readability is improved without undermining parallel execution performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGSOFT Software Engineering Notes", journal-URL = "https://dl.acm.org/citation.cfm?id=J728", } @Article{Satake:2012:OGA, author = "Shin-ichi Satake and Hajime Yoshimori and Takayuki Suzuki", title = "Optimizations of a {GPU} accelerated heat conduction equation by a programming of {CUDA Fortran} from an analysis of a {PTX} file", journal = j-COMP-PHYS-COMM, volume = "183", number = "11", pages = "2376--2385", month = nov, year = "2012", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2012.06.005", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jul 27 07:00:54 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465512002068", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @InProceedings{Schindewolf:2012:WSA, author = "Martin Schindewolf and Barna Bihari and John Gyllenhaal and Martin Schulz and Amy Wang and Wolfgang Karl", title = "What scientific applications can benefit from hardware transactional memory?", crossref = "Hollingsworth:2012:SPI", pages = "90:1--90:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a073.pdf", abstract = "Achieving efficient and correct synchronization of multiple threads is a difficult and error-prone task at small scale and, as we march towards extreme scale computing, will be even more challenging when the resulting application is supposed to utilize millions of cores efficiently. Transactional Memory (TM) is a promising technique to ease the burden on the programmer, but only recently has become available on commercial hardware in the new Blue Gene/Q system and hence the real benefit for realistic applications has not been studied yet. This paper presents the first performance results of TM embedded into OpenMP on a prototype system of BG/Q and characterizes code properties that will likely lead to benefits when augmented with TM primitives. We first study the influence of thread count, environment variables and memory layout on TM performance and identify code properties that will yield performance gains with TM. Second, we evaluate the combination of OpenMP with multiple synchronization primitives on top of MPI to determine suitable task to thread ratios per node. Finally, we condense our findings into a set of best practices. These are applied to a Monte Carlo Benchmark and a Smoothed Particle Hydrodynamics method. In both cases an optimized TM version, executed with 64 threads on one node, outperforms a simple TM implementation. MCB with optimized TM yields a speedup of 27.45 over baseline.", acknowledgement = ack-nhfb, articleno = "90", } @Article{Schmidl:2012:PAT, author = "Dirk Schmidl and Peter Philippen and Daniel Lorenz and Christian R{\"o}ssel and Markus Geimer", title = "Performance Analysis Techniques for Task-Based {OpenMP} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "196--209", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_15", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_15/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Schneider:2012:MAC, author = "Timo Schneider and Robert Gerstenberger and Torsten Hoefler", title = "Micro-applications for Communication Data Access Patterns and {MPI} Datatypes", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "121--131", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_17", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_17/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Sehrish:2012:RFS, author = "Saba Sehrish and Jun Wang", title = "{Reduced Function Set Abstraction (RFSA)} for {MPI-IO}", journal = j-J-SUPERCOMPUTING, volume = "59", number = "1", pages = "131--146", month = jan, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 13 15:25:33 MST 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=131", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Shan:2012:OAA, author = "Hongzhang Shan and Erich Strohmaier and James Amundson and Eric G. Stern", title = "Optimizing the Advanced Accelerator Simulation Framework {Synergia} Using {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "140--153", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_11", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_11/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Shan:2012:PEH, author = "Hongzhang Shan and Nicholas J. Wright and John Shalf and Katherine Yelick and Marcus Wagner and Nathan Wichmann", title = "A preliminary evaluation of the hardware acceleration of the {Cray Gemini} interconnect for {PGAS} languages and comparison with {MPI}", journal = j-SIGMETRICS, volume = "40", number = "2", pages = "92--98", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2381056.2381077", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Nov 9 11:06:40 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmetrics.bib", abstract = "The Gemini interconnect on the Cray XE6 platform provides for lightweight remote direct memory access (RDMA) between nodes, which is useful for implementing partitioned global address space (PGAS) languages like UPC and Co-Array Fortran. In this paper, we perform a study of Gemini performance using a set of communication microbenchmarks and compare the performance of one-sided communication in PGAS languages with two-sided MPI. Our results demonstrate the performance benefits of the PGAS model on Gemini hardware, showing in what circumstances and by how much one-sided communication outperforms two-sided in terms of messaging rate, aggregate bandwidth, and computation and communication overlap capability. For example, for 8-byte and 2KB messages the one-sided messaging rate is 5 and 10 times greater respectively than the two-sided one. The study also reveals important information about how to optimize one-sided Gemini communication.", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", } @Article{Sharma:2012:SRP, author = "Subodh Sharma and Ganesh Gopalakrishnan", title = "A Sound Reduction of Persistent-Sets for Deadlock Detection in {MPI} Applications", journal = j-LECT-NOTES-COMP-SCI, volume = "7498", pages = "194--209", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33296-8_15", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:52 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33296-8_15/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33296-8", book-URL = "http://www.springerlink.com/content/978-3-642-33296-8", fjournal = "Lecture Notes in Computer Science", } @Article{Shi:2012:VGA, author = "Lin Shi and Hao Chen and Jianhua Sun and Kenli Li", title = "{vCUDA}: {GPU}-Accelerated High-Performance Computing in Virtual Machines", journal = j-IEEE-TRANS-COMPUT, volume = "61", number = "6", pages = "804--816", month = jun, year = "2012", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2011.112", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Fri Jul 27 08:32:31 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @InProceedings{Speck:2012:MST, author = "R. Speck and D. Ruprecht and R. Krause and M. Emmett and M. Minion and M. Winkel and P. Gibbon", title = "A massively space-time parallel {$N$}-body solver", crossref = "Hollingsworth:2012:SPI", pages = "92:1--92:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a083.pdf", abstract = "We present a novel space-time parallel version of the Barnes--Hut tree code pepc using pfasst, the Parallel Full Approximation Scheme in Space and Time. The naive use of increasingly more processors for a fixed-size N-body problem is prone to saturate as soon as the number of unknowns per core becomes too small. To overcome this intrinsic strong-scaling limit, we introduce temporal parallelism on top of pepc's existing hybrid MPI/PThreads spatial decomposition. Here, we use pfasst which is based on a combination of the iterations of the parallel-in-time algorithm parareal with the sweeps of spectral deferred correction (SDC) schemes. By combining these sweeps with multiple space-time discretization levels, pfasst relaxes the theoretical bound on parallel efficiency in parareal. We present results from runs on up to 262,144 cores on the IBM Blue Gene/P installation JUGENE, demonstrating that the space-time parallel code provides speedup beyond the saturation of the purely space-parallel approach.", acknowledgement = ack-nhfb, articleno = "92", } @Article{Steinberger:2012:SDS, author = "Markus Steinberger and Bernhard Kainz and Bernhard Kerbl and Stefan Hauswiesner and Michael Kenzel and Dieter Schmalstieg", title = "{Softshell}: dynamic scheduling on {GPUs}", journal = j-TOG, volume = "31", number = "6", pages = "161:1--161:??", month = nov, year = "2012", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/2366145.2366180", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Thu Nov 15 16:10:28 MST 2012", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "In this paper we present Softshell, a novel execution model for devices composed of multiple processing cores operating in a single instruction, multiple data fashion, such as graphics processing units (GPUs). The Softshell model is intuitive and more flexible than the kernel-based adaption of the stream processing model, which is currently the dominant model for general purpose GPU computation. Using the Softshell model, algorithms with a relatively low local degree of parallelism can execute efficiently on massively parallel architectures. Softshell has the following distinct advantages: (1) work can be dynamically issued directly on the device, eliminating the need for synchronization with an external source, i.e., the CPU; (2) its three-tier dynamic scheduler supports arbitrary scheduling strategies, including dynamic priorities and real-time scheduling; and (3) the user can influence, pause, and cancel work already submitted for parallel execution. The Softshell processing model thus brings capabilities to GPU architectures that were previously only known from operating-system designs and reserved for CPU programming. As a proof of our claims, we present a publicly available implementation of the Softshell processing model realized on top of CUDA. The benchmarks of this implementation demonstrate that our processing model is easy to use and also performs substantially better than the state-of-the-art kernel-based processing model for problems that have been difficult to parallelize in the past.", acknowledgement = ack-nhfb, articleno = "161", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", } @Article{Strzodka:2012:DLO, author = "Robert Strzodka", title = "Data layout optimization for multi-valued containers in {OpenCL}", journal = j-J-PAR-DIST-COMP, volume = "72", number = "9", pages = "1073--1082", month = sep, year = "2012", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2011.10.012", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Jul 27 06:43:44 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731511002115", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Su:2012:CPB, author = "ChunYi Su and Dong Li and Dimitrios S. Nikolopoulos and Matthew Grove and Kirk Cameron and Bronis R. de Supinski", title = "Critical path-based thread placement for {NUMA} systems", journal = j-SIGMETRICS, volume = "40", number = "2", pages = "106--112", month = sep, year = "2012", CODEN = "????", DOI = "https://doi.org/10.1145/2381056.2381079", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Nov 9 11:06:40 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmetrics.bib", abstract = "Multicore multiprocessors use a Non Uniform Memory Architecture (NUMA) to improve their scalability. However, NUMA introduces performance penalties due to remote memory accesses. Without efficiently managing data layout and thread mapping to cores, scientific applications may suffer performance loss, even if they are optimized for NUMA. In this paper, we present algorithms and a runtime system that optimize the execution of OpenMP applications on NUMA architectures. By collecting information from hardware counters, the runtime system directs thread placement and reduces performance penalties by minimizing the critical path of OpenMP parallel regions. The runtime system uses a scalable algorithm that derives placement decisions with negligible overhead. We evaluate our algorithms and the runtime system with four NPB applications implemented in OpenMP. On average the algorithms achieve between 8.13\% and 25.68\% performance improvement, compared to the default Linux thread placement scheme. The algorithms miss the optimal thread placement in only 8.9\% of the cases.", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", } @InProceedings{Subramoni:2012:DSI, author = "H. Subramoni and S. Potluri and K. Kandalla and B. Barth and J. Vienne and J. Keasler and K. Tomko and K. Schulz and A. Moody and D. K. Panda", title = "Design of a scalable {InfiniBand} topology service to enable network-topology-aware placement of processes", crossref = "Hollingsworth:2012:SPI", pages = "70:1--70:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a076.pdf", abstract = "Over the last decade, InfiniBand has become an increasingly popular interconnect for deploying modern super-computing systems. However, there exists no detection service that can discover the underlying network topology in a scalable manner and expose this information to runtime libraries and users of the high performance computing systems in a convenient way. In this paper, we design a novel and scalable method to detect the InfiniBand network topology by using Neighbor-Joining techniques (NJ). To the best of our knowledge, this is the first instance where the neighbor joining algorithm has been applied to solve the problem of detecting InfiniBand network topology. We also design a network-topology-aware MPI library that takes advantage of the network topology service. The library places processes taking part in the MPI job in a network-topology-aware manner with the dual aim of increasing intra-node communication and reducing the long distance inter-node communication across the InfiniBand fabric.", acknowledgement = ack-nhfb, articleno = "70", } @Article{Sumimoto:2012:MCL, author = "Shinji Sumimoto", title = "The {MPI Communication Library} for the {K} Computer: Its Design and Implementation", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "11--11", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_3", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_3", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Tahan:2012:ITC, author = "Oussama Tahan and Mats Brorsson and Mohamed Shawky", title = "Introducing Task Cancellation to {OpenMP}", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "73--87", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_6", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_6/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Tahan:2012:UDT, author = "Oussama Tahan and Mohamed Shawky", title = "Using Dynamic Task Level Redundancy for {OpenMP} Fault Tolerance", journal = j-LECT-NOTES-COMP-SCI, volume = "7179", pages = "25--36", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-28293-5_3", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:25:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-28293-5_3/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-28293-5", book-URL = "http://www.springerlink.com/content/978-3-642-28293-5", fjournal = "Lecture Notes in Computer Science", } @Article{Tao:2012:UGA, author = "Jian Tao and Marek Blazewicz and Steven R. Brandt", title = "Using {GPU}'s to accelerate stencil-based computation kernels for the development of large scale scientific applications on heterogeneous systems", journal = j-SIGPLAN, volume = "47", number = "8", pages = "287--288", month = aug, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2370036.2145857", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Sep 12 12:11:57 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPOPP '12 conference proceedings.", abstract = "We present CaCUDA --- a GPGPU kernel abstraction and a parallel programming framework for developing highly efficient large scale scientific applications using stencil computations on hybrid CPU/GPU architectures. CaCUDA is built upon the Cactus computational toolkit, an open source problem solving environment designed for scientists and engineers. Due to the flexibility and extensibility of the Cactus toolkit, the addition of a GPGPU programming framework required no changes to the Cactus infrastructure, guaranteeing that existing features and modules will continue to work without modification. CaCUDA was tested and benchmarked using a 3D CFD code based on a finite difference discretization of Navier--Stokes equations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Terboven:2012:AOT, author = "Christian Terboven and Dirk Schmidl and Tim Cramer and Dieter an Mey", title = "Assessing {OpenMP} Tasking Implementations on {NUMA} Architectures", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "182--195", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_14", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_14/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Thibault:2012:AIF, author = "Julien C. Thibault and Inanc Senocak", title = "Accelerating incompressible flow computations with a {Pthreads--CUDA} implementation on small-footprint multi-{GPU} platforms", journal = j-J-SUPERCOMPUTING, volume = "59", number = "2", pages = "693--719", month = feb, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Apr 6 17:44:43 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=2&spage=693", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Thoman:2012:AOL, author = "Peter Thoman and Herbert Jordan and Simone Pellegrini and Thomas Fahringer", title = "Automatic {OpenMP} Loop Scheduling: a Combined Compiler and Runtime Approach", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "88--101", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_7", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_7/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @InProceedings{Thorson:2012:SUF, author = "Greg Thorson and Michael Woodacre", title = "{SGI UV2}: a fused computation and data analysis machine", crossref = "Hollingsworth:2012:SPI", pages = "105:1--105:??", year = "2012", bibdate = "Thu Nov 15 07:38:35 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", URL = "http://conferences.computer.org/sc/2012/papers/1000a080.pdf", abstract = "UV2 is SGI's second generation data fusion system. UV2 was designed to meet the latest challenges facing users in computation and data analysis. Its unique ability to perform both functions on a single platform enables efficient, easy to manage workflows. This platform has a hybrid infrastructure, leveraging the latest Intel\reg{} EP processors providing industry leading computational power. Due to its high bandwidth, extremely low latency NUMALink\reg{}6 (NL6) interconnect, plus vectorized synchronization and data movement, UV2 provides industry leading data intensive capability. It supports a single operating system (OS) image up to 64TB and 4K threads. Multiple OS images can be deployed on a single NL6 fabric, which has a single flat address space up to 8PB and 256K threads. These capabilities allow for extreme performance on a broad range of programming models and languages including OpenMP[1], MPI, UPC[2], CAF[3] and SHMEM. The architecture, implementation and performance of UV2 are detailed.", acknowledgement = ack-nhfb, articleno = "105", } @Article{Traff:2012:AUE, author = "Jesper Larsson Tr{\"a}ff", title = "Alternative, uniformly expressive and more scalable interfaces for collective communication in {MPI}", journal = j-PARALLEL-COMPUTING, volume = "38", number = "1--2", pages = "26--36", month = jan # "\slash " # feb, year = "2012", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2011.10.009", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 15:17:36 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001402", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Traff:2012:MTM, author = "Jesper Larsson Tr{\"a}ff", title = "{{\tt mpicroscope}}: Towards an {MPI} Benchmark Tool for Performance Guideline Verification", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "100--109", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_15", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_15/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Tsutsui:2012:AMG, author = "Shigeyoshi Tsutsui", title = "{ACO} on Multiple {GPUs} with {CUDA} for Faster Solution of {QAPs}", journal = j-LECT-NOTES-COMP-SCI, volume = "7492", pages = "174--184", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-32964-7_18", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:44 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-32964-7_18/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-32964-7", book-URL = "http://www.springerlink.com/content/978-3-642-32964-7", fjournal = "Lecture Notes in Computer Science", } @Article{Tu:2012:PAO, author = "Bibo Tu and Jianping Fan and Jianfeng Zhan and Xiaofang Zhao", title = "Performance analysis and optimization of {MPI} collective operations on multi-core clusters", journal = j-J-SUPERCOMPUTING, volume = "60", number = "1", pages = "141--162", month = apr, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Apr 6 17:45:24 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=141", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Unat:2012:AFD, author = "Didem Unat and Jun Zhou and Yifeng Cui and Scott B. Baden and Xing Cai", title = "Accelerating a {43$D$} Finite-Difference Earthquake Simulation with a {C-to-CUDA} Translator", journal = j-COMPUT-SCI-ENG, volume = "14", number = "3", pages = "48--59", month = may # "\slash " # jun, year = "2012", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2012.44", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Thu Apr 26 17:01:57 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Urena:2012:IMI, author = "Isa{\'\i}as A. Compr{\'e}s Ure{\~n}a and Michael Riepen and Michael Konow and Michael Gerndt", title = "Invasive {MPI} on {Intel}'s Single-Chip Cloud Computer", journal = j-LECT-NOTES-COMP-SCI, volume = "7179", pages = "74--85", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-28293-5_7", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:25:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012b.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-28293-5_7/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-28293-5", book-URL = "http://www.springerlink.com/content/978-3-642-28293-5", fjournal = "Lecture Notes in Computer Science", } @Article{Wang:2012:OVT, author = "Cheng Wang and Sunita Chandrasekaran and Barbara Chapman", title = "An {OpenMP 3.1} Validation Testsuite", journal = j-LECT-NOTES-COMP-SCI, volume = "7312", pages = "237--249", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8_18", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012e.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_18/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-30961-8", book-URL = "http://www.springerlink.com/content/978-3-642-30961-8", fjournal = "Lecture Notes in Computer Science", } @Article{Wei:2012:OLL, author = "Zheng Wei and Joseph Jaja", title = "Optimization of Linked List Prefix Computations on Multithreaded {GPUs} Using {CUDA}", journal = j-PARALLEL-PROCESS-LETT, volume = "22", number = "4", pages = "1250012", month = dec, year = "2012", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626412500120", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Sat Jun 22 15:54:17 MDT 2013", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Wu:2012:DPL, author = "Chao-Chin Wu and Chao-Tung Yang and Kuan-Chou Lai and Po-Hsun Chiu", title = "Designing parallel loop self-scheduling schemes using the hybrid {MPI} and {OpenMP} programming model for multi-core grid systems", journal = j-J-SUPERCOMPUTING, volume = "59", number = "1", pages = "42--60", month = jan, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Dec 13 15:25:33 MST 2011", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=42", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Wu:2012:PCH, author = "Xingfu Wu and Valerie Taylor", title = "Performance Characteristics of Hybrid {MPI\slash OpenMP} Implementations of {NAS Parallel Benchmarks} {SP} and {BT} on Large-Scale Multicore Clusters", journal = j-COMP-J, volume = "55", number = "2", pages = "154--167", month = feb, year = "2012", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxr063", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Thu Feb 2 09:12:17 MST 2012", bibsource = "http://comjnl.oxfordjournals.org/content/55/2.toc; https://www.math.utah.edu/pub/tex/bib/compj2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://comjnl.oxfordjournals.org/content/55/2/154.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", onlinedate = "July 18, 2011", } @Article{Wu:2012:UHM, author = "Chao-Chin Wu and Lien-Fu Lai and Chao-Tung Yang and Po-Hsun Chiu", title = "Using hybrid {MPI} and {OpenMP} programming to optimize communications in parallel loop self-scheduling schemes for multicore {PC} clusters", journal = j-J-SUPERCOMPUTING, volume = "60", number = "1", pages = "31--61", month = apr, year = "2012", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Apr 6 17:45:24 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=31", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Yoshinaga:2012:DBM, author = "Kazumi Yoshinaga and Yuichi Tsujita and Atsushi Hori and Mikiko Sato and Mitaro Namiki", title = "Delegation-Based {MPI} Communications for a Hybrid Parallel Computer with Many-Core Architecture", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "47--56", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_10", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_10/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Yu:2012:SCC, author = "Fang Yu and Shun-Ching Yang and Farn Wang and Guan-Cheng Chen and Che-Chang Chan", title = "Symbolic consistency checking of {OpenMP} parallel programs", journal = j-SIGPLAN, volume = "47", number = "5", pages = "139--148", month = may, year = "2012", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2345141.2248438", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 6 16:31:46 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "LCTES '12 proceedings.", abstract = "We present a symbolic approach for checking consistency of OpenMP parallel programs. A parallel program is consistent if it yields the same result as its sequential version despite the execution order among threads. We find race conditions of an OpenMP parallel program, construct the formal model of its raced segments under relaxed memory models, and perform guided symbolic simulation to search consistency violations. The simulation terminates when (1) a witness has been found (the program is inconsistent), or (2) all reachable states have been explored (the program is consistent). We have developed the tool Pathg by incorporating Omega library to solve race constraints and Red symbolic simulator to perform guided search. We show that Pathg can prove consistency of programs, identify races that modern OpenMP checkers failed to report, and find inconsistency witnesses effectively against benchmarks from the OpenMP Source Code Repository and the NAS Parallel benchmark suite.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Yuan:2012:PCS, author = "Zhiyong Yuan and Weixin Si and Xiangyun Liao and Zhaoliang Duan and Yihua Ding and Jianhui Zhao", title = "Parallel computing of {$3$D} smoking simulation based on {OpenCL} heterogeneous platform", journal = j-J-SUPERCOMPUTING, volume = "61", number = "1", pages = "84--102", month = jul, year = "2012", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-011-0652-y", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Oct 26 07:41:32 MDT 2012", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=61&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=61&issue=1&spage=84", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Zahavi:2012:FTR, author = "Eitan Zahavi", title = "Fat-tree routing and node ordering providing contention free traffic for {MPI} global collectives", journal = j-J-PAR-DIST-COMP, volume = "72", number = "11", pages = "1423--1432", month = nov, year = "2012", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2012.01.018", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Sep 12 12:11:36 MDT 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731512000305", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Zhao:2012:ASO, author = "Xin Zhao and Gopalakrishnan Santhanaraman and William Gropp", title = "Adaptive Strategy for One-Sided Communication in {MPICH2}", journal = j-LECT-NOTES-COMP-SCI, volume = "7490", pages = "16--26", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1_7", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012h.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_7/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-33518-1", book-URL = "http://www.springerlink.com/content/978-3-642-33518-1", fjournal = "Lecture Notes in Computer Science", } @Article{Zhou:2012:DFD, author = "Xu Zhou and Kai Lu and Xicheng Lu and Xiaoping Wang and Baohua Fan", title = "{dMPI}: Facilitating Debugging of {MPI} Programs via Deterministic Message Passing", journal = j-LECT-NOTES-COMP-SCI, volume = "7513", pages = "172--179", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-35606-3_20", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Wed Dec 19 15:24:06 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012i.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-642-35606-3_20/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-35606-3", book-URL = "http://www.springerlink.com/content/978-3-642-35606-3", fjournal = "Lecture Notes in Computer Science", } @Article{Zhu:2012:CDS, author = "Ke Zhu and Matthias Butenuth and Pablo d'Angelo", title = "Comparison of Dense Stereo Using {CUDA}", journal = j-LECT-NOTES-COMP-SCI, volume = "6554", pages = "398--410", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-35740-4_31", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", bibdate = "Mon Dec 24 08:20:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs2012a.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-35740-4_31", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-642-35740-4", book-URL = "http://www.springerlink.com/content/978-3-642-35740-4", fjournal = "Lecture Notes in Computer Science", } @Article{Augusto:2013:APG, author = "Douglas A. Augusto and Helio J. C. Barbosa", title = "Accelerated parallel genetic programming tree evaluation with {OpenCL}", journal = j-J-PAR-DIST-COMP, volume = "73", number = "1", pages = "86--100", month = jan, year = "2013", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2012.01.012", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Nov 17 07:06:13 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S074373151200024X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Bach:2013:LQB, author = "Matthias Bach and Volker Lindenstruth and Owe Philipsen and Christopher Pinke", title = "{Lattice QCD} based on {OpenCL}", journal = j-COMP-PHYS-COMM, volume = "184", number = "9", pages = "2042--2052", month = sep, year = "2013", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Aug 26 14:34:22 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465513001288", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bai:2013:SLA, author = "Mingze Bai and Shixin Sun and Hong Tang and Yusheng Dou and Glenn V. Lo", title = "An {SPMD}-Like Algorithm for Parallelizing Molecular Dynamics Using {OpenMP}", journal = j-COMPUT-SCI-ENG, volume = "15", number = "4", pages = "48--56", month = jul # "\slash " # aug, year = "2013", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2012.66", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Tue Dec 3 15:39:06 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Barkati:2013:SPA, author = "Karim Barkati and Pierre Jouvelot", title = "Synchronous programming in audio processing: a lookup table oscillator case study", journal = j-COMP-SURV, volume = "46", number = "2", pages = "24:1--24:??", month = nov, year = "2013", CODEN = "CMSVAN", DOI = "https://doi.org/10.1145/2543581.2543591", ISSN = "0360-0300 (print), 1557-7341 (electronic)", ISSN-L = "0360-0300", bibdate = "Thu Feb 6 07:35:29 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/surveys/; https://www.math.utah.edu/pub/tex/bib/compsurv.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The adequacy of a programming language to a given software project or application domain is often considered a key factor of success in software development and engineering, even though little theoretical or practical information is readily available to help make an informed decision. In this article, we address a particular version of this issue by comparing the adequacy of general-purpose synchronous programming languages to more Domain-Specific Languages (DSLs) in the field of computer music. More precisely, we implemented and tested the same lookup table oscillator example program, one of the most classical algorithms for sound synthesis, using a selection of significant synchronous programming languages, half of which designed as specific music languages-Csound, Pure Data, SuperCollider, ChucK, Faust-and the other half being general synchronous formalisms-Signal, Lustre, Esterel, Lucid Synchrone and C with the OpenMP Stream Extension (Matlab/Octave is used for the initial specification). The advantages of these two approaches are discussed, providing insights to language designers and possibly software developers of both communities regarding programming languages design for the audio domain.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Computing Surveys", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J204", } @Article{Berka:2013:CPC, author = "Tobias Berka and Giorgos Kollias and Helge Hagenauer and Marian Vajter{\v{s}}ic and Ananth Grama", title = "Concurrent programming constructs for parallel {MPI} applications", journal = j-J-SUPERCOMPUTING, volume = "63", number = "2", pages = "385--406", month = feb, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-011-0739-5", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Apr 1 14:50:44 MDT 2013", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=63&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-011-0739-5", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Bland:2013:EUL, author = "Wesley Bland and Aurelien Bouteiller and Thomas Herault and Joshua Hursey {\ldots}", title = "An evaluation of {User-Level Failure Mitigation} support in {MPI}", journal = j-COMPUTING, volume = "95", number = "12", pages = "1171--1184", month = dec, year = "2013", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-013-0331-3", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Wed Jan 29 10:10:11 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=95&issue=12; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00607-013-0331-3", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Bland:2013:PFR, author = "Wesley Bland and Aurelien Bouteiller and Thomas Herault and George Bosilca and Jack Dongarra", title = "Post-failure recovery of {MPI} communication capability: Design and rationale", journal = j-IJHPCA, volume = "27", number = "3", pages = "244--254", month = aug, year = "2013", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342013488238", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Mar 14 15:39:55 MDT 2014", bibsource = "http://hpc.sagepub.com/content/27/3.toc; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/27/3/244.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "June 3, 2013", } @Article{Bland:2013:SIP, author = "Wesley Bland and Peng Du and Aurelien Bouteiller and Thomas Herault and George Bosilca and Jack J. Dongarra", title = "Special Issue Papers: Extending the scope of the {Checkpoint-on-Failure} protocol for forward recovery in standard {MPI}", journal = j-CCPE, volume = "25", number = "17", pages = "2381--2393", day = "10", month = dec, year = "2013", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3100", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Dec 3 10:37:48 MST 2013", bibsource = "http://www.interscience.wiley.com/jpages/1532-0626; https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "23 Jul 2013", } @Article{Buyukkececi:2013:POI, author = "Ferit B{\"u}y{\"u}kke{\c{c}}eci and Omar Awile and Ivo F. Sbalzarini", title = "A portable {OpenCL} implementation of generic particle-mesh and mesh-particle interpolation in {$2$D} and {$3$D}", journal = j-PARALLEL-COMPUTING, volume = "39", number = "2", pages = "94--111", month = feb, year = "2013", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2012.12.001", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Thu Feb 28 07:26:40 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819112000920", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @TechReport{Cao:2013:CHP, author = "Chongxiao Cao and Jack Dongarra and Peng Du and Mark Gates and Piotr Luszczek and Stanimire Tomov", title = "{clMAGMA}: High Performance Dense Linear Algebra with {OpenCL}", type = "LAPACK Working Note", number = "275", institution = inst-UTK-CS, address = inst-UTK-CS:adr, month = mar, year = "2013", bibdate = "Sun May 5 11:20:19 2013", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/lawn.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.netlib.org/lapack/lawnspdf/lawn275.pdf", acknowledgement = ack-nhfb, utknumber = "UT-CS-13-706", } @Article{Chang:2013:PDS, author = "Yao-Lin Chang and I-Lun Tseng", title = "A parallel dual-scanline algorithm for partitioning parameterized 45-degree polygons", journal = j-TODAES, volume = "18", number = "4", pages = "59:1--59:??", month = oct, year = "2013", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/2505015", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Fri Nov 8 11:45:54 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/todaes/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", abstract = "In order to use rectangular corner stitching data structures in storing parameterized orthogonal layouts, parameterized polygons in the layouts must be partitioned into rectangles. Likewise, in order to use trapezoidal corner stitching data structures in storing parameterized 45-degree layouts, parameterized polygons in the layouts have to be partitioned into trapezoids. In this article, a parallel polygon partitioning algorithm is proposed; the algorithm is capable of partitioning parameterized orthogonal polygons into parameterized rectangles as well as partitioning parameterized 45-degree polygons into parameterized trapezoids. Additionally, the algorithm can be used to partition fixed-coordinate polygons. By adopting the dual-scanline technique, which involves using two scanlines to concurrently sweep an input polygon, the parallel partitioning algorithm can process vertices and edges of the input polygon efficiently. The parallel polygon partitioning algorithm has been implemented in C++ with the use of OpenMP. Compared with a sequential partitioning program which uses a single scanline, our parallel partitioning program can achieve 20\% to 30\% speedup while partitioning large parameterized polygons or partitioning parameterized polygons with complex constraints.", acknowledgement = ack-nhfb, articleno = "59", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776", } @Article{Chen:2013:IRM, author = "Zhezhe Chen and Qi Gao and Wenbin Zhang and Feng Qin", title = "Improving the Reliability of {MPI} Libraries via Message Flow Checking", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "24", number = "3", pages = "535--549", month = mar, year = "2013", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2012.127", ISSN = "1045-9219", ISSN-L = "1045-9219", bibdate = "Wed May 1 08:02:21 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Czapinski:2013:EPM, author = "Michal Czapi{\'n}ski", title = "An effective {Parallel Multistart Tabu Search for Quadratic Assignment Problem} on {CUDA} platform", journal = j-J-PAR-DIST-COMP, volume = "73", number = "11", pages = "1461--1468", month = nov, year = "2013", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon Sep 23 11:46:28 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S074373151200175X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Dang:2013:CES, author = "Hoang-Vu Dang and Bertil Schmidt", title = "{CUDA}-enabled Sparse Matrix-Vector Multiplication on {GPUs} using atomic operations", journal = j-PARALLEL-COMPUTING, volume = "39", number = "11", pages = "737--750", month = nov, year = "2013", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Nov 29 10:01:37 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001178", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Demidov:2013:PCO, author = "Denis Demidov and Karsten Ahnert and Karl Rupp and Peter Gottschling", title = "Programming {CUDA} and {OpenCL}: a Case Study Using Modern {C++} Libraries", journal = j-SIAM-J-SCI-COMP, volume = "35", number = "5", pages = "C453--C472", month = "????", year = "2013", CODEN = "SJOCE3", DOI = "https://doi.org/10.1137/120903683", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", bibdate = "Fri Mar 7 10:32:43 MST 2014", bibsource = "http://epubs.siam.org/toc/sjoce3/35/5; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib", acknowledgement = ack-nhfb, fjournal = "SIAM Journal on Scientific Computing", journal-URL = "http://epubs.siam.org/sisc", onlinedate = "January 2013", } @Article{Deo:2013:PSA, author = "Mrinal Deo and Sean Keely", title = "Parallel suffix array and least common prefix for the {GPU}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "197--206", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442536", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Suffix Array (SA) is a data structure formed by sorting the suffixes of a string into lexicographic order. SAs have been used in a variety of applications, most notably in pattern matching and Burrows--Wheeler Transform (BWT) based lossless data compression. SAs have also become the data structure of choice for many, if not all, string processing problems to which suffix tree methodology is applicable. Over the last two decades researchers have proposed many suffix array construction algorithm (SACAs). We do a systematic study of the main classes of SACAs with the intent of mapping them onto a data parallel architecture like the GPU. We conclude that skew algorithm [12], a linear time recursive algorithm, is the best candidate for GPUs as all its phases can be efficiently mapped to a data parallel hardware. Our OpenCL implementation of skew algorithm achieves a throughput of up to 25 MStrings/sec and a speedup of up to 34x and 5.8x over a single threaded CPU implementation using a discrete GPU and APU respectively. We also compare our OpenCL implementation against the fastest known CPU implementation based on induced copying and achieve a speedup of up to 3.7x. Using SA we construct BWT on GPU and achieve a speedup of 11x over the fastest known BWT on GPU. Suffix arrays are often augmented with the longest common prefix (LCP) information. We design a novel high-performance parallel algorithm for computing LCP on the GPU. Our GPU implementation of LCP achieves a speedup of up to 25x and 4.3x on discrete GPU and APU respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Ellingson:2013:SNU, author = "Sally R. Ellingson and Jeremy C. Smith and Jerome Baudry", title = "Software News and Updates: {VinaMPI}: {Facilitating} multiple receptor high-throughput virtual docking on high-performance computers", journal = j-J-COMPUT-CHEM, volume = "34", number = "25", pages = "2212--2221", day = "30", month = sep, year = "2013", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.23367", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Wed Nov 13 14:32:36 MST 2013", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", onlinedate = "29 Jun 2013", } @Article{Friedley:2013:OPE, author = "Andrew Friedley and Torsten Hoefler and Greg Bronevetsky and Andrew Lumsdaine and Ching-Chen Ma", title = "Ownership passing: efficient distributed memory programming on multi-core systems", journal = j-SIGPLAN, volume = "48", number = "8", pages = "177--186", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "The number of cores in multi- and many-core high-performance processors is steadily increasing. MPI, the de-facto standard for programming high-performance computing systems offers a distributed memory programming model. MPI's semantics force a copy from one process' send buffer to another process' receive buffer. This makes it difficult to achieve the same performance on modern hardware than shared memory programs which are arguably harder to maintain and debug. We propose generalizing MPI's communication model to include ownership passing, which make it possible to fully leverage the shared memory hardware of multi- and many-core CPUs to stream communicated data concurrently with the receiver's computations on it. The benefits and simplicity of message passing are retained by extending MPI with calls to send (pass) ownership of memory regions, instead of their contents, between processes. Ownership passing is achieved with a hybrid MPI implementation that runs MPI processes as threads and is mostly transparent to the user. We propose an API and a static analysis technique to transform legacy MPI codes automatically and transparently to the programmer, demonstrating that this scheme is easy to use in practice. Using the ownership passing technique, we see up to 51\% communication speedups over a standard message passing implementation on state-of-the art multicore systems. Our analysis and interface will lay the groundwork for future development of MPI-aware optimizing compilers and multi-core specific optimizations, which will be key for success in current and next-generation computing platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Gao:2013:GGA, author = "Mingcen Gao and Thanh-Tung Cao and Ashwin Nanjappa and Tiow-Seng Tan and Zhiyong Huang", title = "{gHull}: a {GPU} algorithm for {$3$D} convex hull", journal = j-TOMS, volume = "40", number = "1", pages = "3:1--3:19", month = sep, year = "2013", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2513109.2513112", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon Sep 30 16:05:58 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "A novel algorithm is presented to compute the convex hull of a point set in R$^3$ using the graphics processing unit (GPU). By exploiting the relationship between the Voronoi diagram and the convex hull, the algorithm derives the approximation of the convex hull from the former. The other extreme vertices of the convex hull are then found by using a two-round checking in the digital and the continuous space successively. The algorithm does not need explicit locking or any other concurrency control mechanism, thus it can maximize the parallelism available on the modern GPU. The implementation using the CUDA programming model on NVIDIA GPUs is exact and efficient. The experiments show that it is up to an order of magnitude faster than other sequential convex hull implementations running on the CPU for inputs of millions of points. The works demonstrate that the GPU can be used to solve nontrivial computational geometry problems with significant performance benefit.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Gardner:2013:CCE, author = "Mark Gardner and Paul Sathre and Wu-chun Feng and Gabriel Martinez", title = "Characterizing the challenges and evaluating the efficacy of a {CUDA-to-OpenCL} translator", journal = j-PARALLEL-COMPUTING, volume = "39", number = "12", pages = "769--786", month = dec, year = "2013", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Dec 3 18:06:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001075", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Goglin:2013:KGS, author = "Brice Goglin and St{\'e}phanie Moreaud", title = "{KNEM}: a generic and scalable kernel-assisted intra-node {MPI} communication framework", journal = j-J-PAR-DIST-COMP, volume = "73", number = "2", pages = "176--188", month = feb, year = "2013", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2012.09.016", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Dec 13 20:22:17 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731512002316", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Grasso:2013:APS, author = "Ivan Grasso and Klaus Kofler and Biagio Cosenza and Thomas Fahringer", title = "Automatic problem size sensitive task partitioning on heterogeneous parallel systems", journal = j-SIGPLAN, volume = "48", number = "8", pages = "281--282", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442545", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "In this paper we propose a novel approach which automatizes task partitioning in heterogeneous systems. Our framework is based on the Insieme Compiler and Runtime infrastructure. The compiler translates a single-device OpenCL program into a multi-device OpenCL program. The runtime system then performs dynamic task partitioning based on an offline-generated prediction model. In order to derive the prediction model, we use a machine learning approach that incorporates static program features as well as dynamic, input sensitive features. Our approach has been evaluated over a suite of 23 programs and achieves performance improvements compared to an execution of the benchmarks on a single CPU and a single GPU only.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Gu:2013:PCI, author = "Zheng Gu and Matthew Small and Xin Yuan and Aniruddha Marathe and David K. Lowenthal", title = "Protocol Customization for Improving {MPI} Performance on {RDMA}-Enabled Clusters", journal = j-INT-J-PARALLEL-PROG, volume = "41", number = "5", pages = "682--703", month = oct, year = "2013", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-013-0242-0", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jun 22 12:29:26 MDT 2013", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=41&issue=5; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-013-0242-0", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Hadi:2013:CFA, author = "Mohammed F. Hadi and Seyed A. Esmaeili", title = "{CUDA Fortran} acceleration for the finite-difference time-domain method", journal = j-COMP-PHYS-COMM, volume = "184", number = "5", pages = "1395--1400", month = may, year = "2013", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Mar 27 05:55:10 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465513000118", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Heimel:2013:HOP, author = "Max Heimel and Michael Saecker and Holger Pirk and Stefan Manegold and Volker Markl", title = "Hardware-oblivious parallelism for in-memory column-stores", journal = j-PROC-VLDB-ENDOWMENT, volume = "6", number = "9", pages = "709--720", month = jul, year = "2013", CODEN = "????", ISSN = "2150-8097", bibdate = "Fri Dec 13 05:56:46 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", abstract = "The multi-core architectures of today's computer systems make parallelism a necessity for performance critical applications. Writing such applications in a generic, hardware-oblivious manner is a challenging problem: Current database systems thus rely on labor-intensive and error-prone manual tuning to exploit the full potential of modern parallel hardware architectures like multi-core CPUs and graphics cards. We propose an alternative design for a parallel database engine, based on a single set of hardware-oblivious operators, which are compiled down to the actual hardware at runtime. This design reduces the development overhead for parallel database engines, while achieving competitive performance to hand-tuned systems. We provide a proof-of-concept for this design by integrating operators written using the parallel programming framework OpenCL into the open-source database MonetDB. Following this approach, we achieve efficient, yet highly portable parallel code without the need for optimization by hand. We evaluated our implementation against MonetDB using TPC-H derived queries and observed a performance that rivals that of MonetDB's query execution on the CPU and surpasses it on the GPU. In addition, we show that the same set of operators runs nearly unchanged on a GPU, demonstrating the feasibility of our approach.", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", } @Article{Hilbrich:2013:MRE, author = "Tobias Hilbrich and Joachim Protze and Martin Schulz and Bronis R. de Supinski and Matthias S. M{\"u}ller", title = "{MPI} runtime error detection with {MUST}: {Advances} in deadlock detection", journal = j-SCI-PROG, volume = "21", number = "3--4", pages = "109--121", month = "????", year = "2013", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-130368", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Sat Mar 8 14:11:02 MST 2014", bibsource = "http://www.iospress.nl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Hoefler:2013:MMN, author = "Torsten Hoefler and James Dinan and Darius Buntinas and Pavan Balaji and Brian Barrett {\ldots}", title = "{MPI $+$ MPI}: a new hybrid approach to parallel programming with {MPI} plus shared memory", journal = j-COMPUTING, volume = "95", number = "12", pages = "1121--1136", month = dec, year = "2013", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-013-0324-2", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Wed Jan 29 10:10:11 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=95&issue=12; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00607-013-0324-2", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Hogg:2013:FDT, author = "J. D. Hogg", title = "A Fast Dense Triangular Solve in {CUDA}", journal = j-SIAM-J-SCI-COMP, volume = "35", number = "3", pages = "C303--C322", month = "????", year = "2013", CODEN = "SJOCE3", DOI = "https://doi.org/10.1137/12088358X", ISSN = "1064-8275 (print), 1095-7197 (electronic)", ISSN-L = "1064-8275", bibdate = "Fri Jul 19 07:43:53 MDT 2013", bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/35/3; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib", acknowledgement = ack-nhfb, fjournal = "SIAM Journal on Scientific Computing", journal-URL = "http://epubs.siam.org/sisc", onlinedate = "January 2013", } @Article{Huang:2013:ACM, author = "Libo Huang and Zhiying Wang and Nong Xiao and Yongwen Wang and Qiang Dou", title = "Adaptive communication mechanism for accelerating {MPI} functions in {NoC}-based multicore processors", journal = j-TACO, volume = "10", number = "3", pages = "18:1--18:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2512434", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Mon Sep 16 17:20:12 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Multicore designs have emerged as the dominant organization for future high-performance microprocessors. Communication in such designs is often enabled by Networks-on-Chip (NoCs). A new trend in such architectures is to fit a Message Passing Interface (MPI) programming model on NoCs to achieve optimal parallel application performance. A key issue in designing MPI over NoCs is communication protocol, which has not been explored in previous research. This article advocates a hardware-supported communication mechanism using a protocol-adaptive approach to adjust to varying NoC configurations (e.g., number of buffers) and workload behavior (e.g., number of messages). We propose the ADaptive Communication Mechanism (ADCM), a hybrid protocol that involves behavior similar to buffered communication when sufficient buffer is available in the receiver to that similar to a synchronous protocol when buffers in the receiver are limited. ADCM adapts dynamically by deciding communication protocol on a per-request basis using a local estimate of recent buffer utilization. ADCM attempts to combine both the advantages of buffered and synchronous communication modes to achieve enhanced throughput and performance. Simulations of various workloads show that the proposed communication mechanism can be effectively used in future NoC designs.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Jimenez:2013:BCA, author = "Jes{\'u}s Jim{\'e}nez and Juan {Ruiz de Miras}", title = "Box-counting algorithm on {GPU} and multi-core {CPU}: an {OpenCL} cross-platform study", journal = j-J-SUPERCOMPUTING, volume = "65", number = "3", pages = "1327--1352", month = sep, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-013-0885-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Feb 8 11:06:43 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=65&issue=3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-013-0885-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Jin:2013:PCU, author = "Hui Jin and Xian-He Sun", title = "Performance comparison under failures of {MPI} and {MapReduce}: an analytical approach", journal = j-FUT-GEN-COMP-SYS, volume = "29", number = "7", pages = "1808--1815", month = sep, year = "2013", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Aug 26 16:08:23 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X13000290", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Jog:2013:OCT, author = "Adwait Jog and Onur Kayiran and Nachiappan Chidambaram Nachiappan and Asit K. Mishra and Mahmut T. Kandemir and Onur Mutlu and Ravishankar Iyer and Chita R. Das", title = "{OWL}: cooperative thread array aware scheduling techniques for improving {GPGPU} performance", journal = j-SIGPLAN, volume = "48", number = "4", pages = "395--406", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451158", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Emerging GPGPU architectures, along with programming models like CUDA and OpenCL, offer a cost-effective platform for many applications by providing high thread level parallelism at lower energy budgets. Unfortunately, for many general-purpose applications, available hardware resources of a GPGPU are not efficiently utilized, leading to lost opportunity in improving performance. A major cause of this is the inefficiency of current warp scheduling policies in tolerating long memory latencies. In this paper, we identify that the scheduling decisions made by such policies are agnostic to thread-block, or cooperative thread array (CTA), behavior, and as a result inefficient. We present a coordinated CTA-aware scheduling policy that utilizes four schemes to minimize the impact of long memory latencies. The first two schemes, CTA-aware two-level warp scheduling and locality aware warp scheduling, enhance per-core performance by effectively reducing cache contention and improving latency hiding capability. The third scheme, bank-level parallelism aware warp scheduling, improves overall GPGPU performance by enhancing DRAM bank-level parallelism. The fourth scheme employs opportunistic memory-side prefetching to further enhance performance by taking advantage of open DRAM rows. Evaluations on a 28-core GPGPU platform with highly memory-intensive applications indicate that our proposed mechanism can provide 33\% average performance improvement compared to the commonly-employed round-robin warp scheduling policy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '13 conference proceedings.", } @Article{Kegel:2013:DTU, author = "Philipp Kegel and Michel Steuwer and Sergei Gorlatch", title = "{dOpenCL}: Towards uniform programming of distributed heterogeneous multi-\slash many-core systems", journal = j-J-PAR-DIST-COMP, volume = "73", number = "12", pages = "1639--1648", month = dec, year = "2013", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Nov 29 09:55:28 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731513001597", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Khanna:2013:HPN, author = "Gaurav Khanna", title = "High-Precision Numerical Simulations on a {CUDA GPU}: {Kerr} Black Hole Tails", journal = j-J-SCI-COMPUT, volume = "56", number = "2", pages = "366--380", month = aug, year = "2013", CODEN = "JSCOEB", DOI = "https://doi.org/10.1007/s10915-012-9679-3", ISSN = "0885-7474 (print), 1573-7691 (electronic)", ISSN-L = "0885-7474", bibdate = "Sat Mar 8 11:16:21 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7474&volume=56&issue=2; https://www.math.utah.edu/pub/tex/bib/jscicomput.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10915-012-9679-3; http://link.springer.com/content/pdf/10.1007/s10915-012-9679-3.pdf", acknowledgement = ack-nhfb, fjournal = "Journal of Scientific Computing", journal-URL = "http://link.springer.com/journal/10915", } @Article{Kim:2013:MPE, author = "Yooseong Kim and Aviral Shrivastava", title = "Memory performance estimation of {CUDA} programs", journal = j-TECS, volume = "13", number = "2", pages = "21:1--21:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514648", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "CUDA has successfully popularized GPU computing, and GPGPU applications are now used in various embedded systems. The CUDA programming model provides a simple interface to program on GPUs, but tuning GPGPU applications for high performance is still quite challenging. Programmers need to consider numerous architectural details, and small changes in source code, especially on the memory access pattern, can affect performance significantly. This makes it very difficult to optimize CUDA programs. This article presents CuMAPz, which is a tool to analyze and compare the memory performance of CUDA programs. CuMAPz can help programmers explore different ways of using shared and global memories, and optimize their program for efficient memory behavior. CuMAPz models several memory-performance-related factors: data reuse, global memory access coalescing, global memory latency hiding, shared memory bank conflict, channel skew, and branch divergence. Experimental results show that CuMAPz can accurately estimate performance with correlation coefficient of 0.96. By using CuMAPz to explore the memory access design space, we could improve the performance of our benchmarks by 30\% more than the previous approach [Hong and Kim 2010].", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J840", } @Article{Krotkiewski:2013:ESC, author = "Marcin Krotkiewski and Marcin Dabrowski", title = "Efficient {$3$D} stencil computations using {CUDA}", journal = j-PARALLEL-COMPUTING, volume = "39", number = "10", pages = "533--548", month = oct, year = "2013", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Sep 30 16:37:36 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S016781911300094X", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Kruzel:2013:VOI, author = "Filip Kruzel and Krzysztof Bana{\'s}", title = "Vectorized {OpenCL} implementation of numerical integration for higher order finite elements", journal = j-COMPUT-MATH-APPL, volume = "66", number = "10", pages = "2030--2044", month = dec, year = "2013", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Wed Mar 1 21:51:22 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S089812211300521X", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Kuckuk:2013:IPD, author = "Sebastian Kuckuk and Tobias Preclik and Harald K{\"o}stler", title = "Interactive particle dynamics using {OpenCL} and {Kinect}", journal = j-INT-J-PAR-EMER-DIST-SYS, volume = "28", number = "6", pages = "519--536", year = "2013", DOI = "https://doi.org/10.1080/17445760.2012.745671", bibdate = "Thu Mar 6 05:45:37 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel, Emergent and Distributed Systems: IJPEDS", journal-URL = "http://www.tandfonline.com/loi/gpaa20", } @Article{Kumar:2013:GAI, author = "Piyush Kumar and Anupam Agrawal", title = "GPU-Accelerated Interactive Visualization of {$ 3 D $} Volumetric Data Using {CUDA}", journal = j-INT-J-IMAGE-GRAPHICS, volume = "13", number = "2", pages = "??--??", month = apr, year = "2013", CODEN = "????", ISSN = "0219-4678", ISSN-L = "0219-4678", bibdate = "Tue Aug 6 10:37:51 MDT 2013", bibsource = "http://ejournals.wspc.com.sg/ijig/ijig.shtml; https://www.math.utah.edu/pub/tex/bib/ijig.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://doi.acm.org/10.1142/S0219467813400032", acknowledgement = ack-nhfb, fjournal = "International Journal of Image and Graphics (IJIG)", journal-URL = "http://www.worldscientific.com/worldscinet/ijig", } @Article{Kunaseth:2013:ASD, author = "Manaschai Kunaseth and David F. Richards and James N. Glosli", title = "Analysis of scalable data-privatization threading algorithms for hybrid {MPI\slash OpenMP} parallelization of molecular dynamics", journal = j-J-SUPERCOMPUTING, volume = "66", number = "1", pages = "406--430", month = oct, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-013-0915-x", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Feb 8 11:13:32 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=66&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-013-0915-x", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Li:2013:COM, author = "Hung-Fu Li and Tyng-Yeu Liang and Jun-Yao Chiu", title = "A compound {OpenMP\slash MPI} program development toolkit for hybrid {CPU\slash GPU} clusters", journal = j-J-SUPERCOMPUTING, volume = "66", number = "1", pages = "381--405", month = oct, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-013-0912-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Feb 8 11:13:32 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=66&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-013-0912-0", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Liu:2013:DLO, author = "Jun Liu and Wei Ding and Ohyoung Jang and Mahmut Kandemir", title = "Data layout optimization for {GPGPU} architectures", journal = j-SIGPLAN, volume = "48", number = "8", pages = "283--284", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442546", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "GPUs are being widely used in accelerating general-purpose applications, leading to the emergence of GPGPU architectures. New programming models, e.g., Compute Unified Device Architecture (CUDA), have been proposed to facilitate programming general-purpose computations in GPGPUs. However, writing high-performance CUDA codes manually is still tedious and difficult. In particular, the organization of the data in the memory space can greatly affect the performance due to the unique features of a custom GPGPU memory hierarchy. In this work, we propose an automatic data layout transformation framework to solve the key issues associated with a GPGPU memory hierarchy (i.e., channel skewing, data coalescing, and bank conflicts). Our approach employs a widely applicable strategy based on a novel concept called data localization. Specifically, we try to optimize the layout of the arrays accessed in affine loop nests, for both the device memory and shared memory, at both coarse grain and fine grain parallelization levels. We performed an experimental evaluation of our data layout optimization strategy using 15 benchmarks on an NVIDIA CUDA GPU device. The results show that the proposed data transformation approach brings around 4.3X speedup on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Lu:2013:MLP, author = "Ligang Lu and Karen Magerlein", title = "Multi-level parallel computing of reverse time migration for seismic imaging on {Blue Gene/Q}", journal = j-SIGPLAN, volume = "48", number = "8", pages = "291--292", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442550", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Blue Gene/Q (BG/Q) is an early representative of increasing scale and thread count that will characterize future HPC systems: large counts of nodes, cores, and threads; and a rich programming environment with many degrees of freedom in parallel computing optimization. So it is both a challenge and an opportunity to it to accelerate the seismic imaging applications to the unprecedented levels that will significantly advance the technologies for the oil and gas industry. In this work we aim to address two important questions: how HPC systems with high levels of scale and thread count will perform in real applications; and how systems with many degrees of freedom in parallel programming can be calibrated to achieve optimal performance. Based on BG/Q's architecture features and RTM workload characteristics, we developed massive domain partition, MPI, and SIMD Our detailed deep analyses in various aspects of optimization also provide valuable experience and insights into how can be utilized to facilitate the advance of seismic imaging technologies. Our BG/Q RTM solution achieved a 14.93x speedup over the BG/P implementation. Our multi-level parallelism strategies for Reverse Time Migration (RTM) seismic imaging computing on BG/Q provides an example of how HPC systems like BG/Q can accelerate applications to a new level.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Lu:2013:WGA, author = "Xiangwen Lu and Jiabin Yuan and Weiwei Zhang", title = "Workflow of the {Grover} algorithm simulation incorporating {CUDA} and {GPGPU}", journal = j-COMP-PHYS-COMM, volume = "184", number = "9", pages = "2035--2041", month = sep, year = "2013", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Aug 26 14:34:22 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465513001148", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Ma:2013:KAT, author = "Teng Ma and George Bosilca and Aurelien Bouteiller and Jack J. Dongarra", title = "Kernel-assisted and topology-aware {MPI} collective communications on multicore\slash many-core platforms", journal = j-J-PAR-DIST-COMP, volume = "73", number = "7", pages = "1000--1010", month = jul, year = "2013", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon Aug 26 16:44:35 MDT 2013", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731513000166", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Mohamed:2013:MMM, author = "Hisham Mohamed and St{\'e}phane Marchand-Maillet", title = "{MRO-MPI}: {MapReduce} overlapping using {MPI} and an optimized data exchange policy", journal = j-PARALLEL-COMPUTING, volume = "39", number = "12", pages = "851--866", month = dec, year = "2013", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Dec 3 18:06:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001026", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Nandivada:2013:TFO, author = "V. Krishna Nandivada and Jun Shirako and Jisheng Zhao and Vivek Sarkar", title = "A Transformation Framework for Optimizing Task-Parallel Programs", journal = j-TOPLAS, volume = "35", number = "1", pages = "3:1--3:??", month = apr, year = "2013", CODEN = "ATPSDT", DOI = "https://doi.org/10.1145/2450136.2450138", ISSN = "0164-0925 (print), 1558-4593 (electronic)", ISSN-L = "0164-0925", bibdate = "Tue Apr 30 18:56:06 MDT 2013", bibsource = "http://www.acm.org/pubs/contents/journals/toplas/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toplas.bib", abstract = "Task parallelism has increasingly become a trend with programming models such as OpenMP 3.0, Cilk, Java Concurrency, X10, Chapel and Habanero-Java (HJ) to address the requirements of multicore programmers. While task parallelism increases productivity by allowing the programmer to express multiple levels of parallelism, it can also lead to performance degradation due to increased overheads. In this article, we introduce a transformation framework for optimizing task-parallel programs with a focus on task creation and task termination operations. These operations can appear explicitly in constructs such as async, finish in X10 and HJ, task, taskwait in OpenMP 3.0, and spawn, sync in Cilk, or implicitly in composite code statements such as foreach and ateach loops in X10, forall and foreach loops in HJ, and parallel loop in OpenMP. Our framework includes a definition of data dependence in task-parallel programs, a happens-before analysis algorithm, and a range of program transformations for optimizing task parallelism. Broadly, our transformations cover three different but interrelated optimizations: (1) finish-elimination, (2) forall-coarsening, and (3) loop-chunking. Finish-elimination removes redundant task termination operations, forall-coarsening replaces expensive task creation and termination operations with more efficient synchronization operations, and loop-chunking extracts useful parallelism from ideal parallelism. All three optimizations are specified in an iterative transformation framework that applies a sequence of relevant transformations until a fixed point is reached. Further, we discuss the impact of exception semantics on the specified transformations, and extend them to handle task-parallel programs with precise exception semantics. Experimental results were obtained for a collection of task-parallel benchmarks on three multicore platforms: a dual-socket 128-thread (16-core) Niagara T2 system, a quad-socket 16-core Intel Xeon SMP, and a quad-socket 32-core Power7 SMP. We have observed that the proposed optimizations interact with each other in a synergistic way, and result in an overall geometric average performance improvement between 6.28$ \times $ and 10.30$ \times $, measured across all three platforms for the benchmarks studied.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Programming Languages and Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783", } @Article{Pai:2013:IGC, author = "Sreepathi Pai and Matthew J. Thazhuthaveetil and R. Govindarajan", title = "Improving {GPGPU} concurrency with elastic kernels", journal = j-SIGPLAN, volume = "48", number = "4", pages = "407--418", month = apr, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499368.2451160", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:23 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Each new generation of GPUs vastly increases the resources available to GPGPU programs. GPU programming models (like CUDA) were designed to scale to use these resources. However, we find that CUDA programs actually do not scale to utilize all available resources, with over 30\% of resources going unused on average for programs of the Parboil2 suite that we used in our work. Current GPUs therefore allow concurrent execution of kernels to improve utilization. In this work, we study concurrent execution of GPU kernels using multiprogram workloads on current NVIDIA Fermi GPUs. On two-program workloads from the Parboil2 benchmark suite we find concurrent execution is often no better than serialized execution. We identify that the lack of control over resource allocation to kernels is a major serialization bottleneck. We propose transformations that convert CUDA kernels into elastic kernels which permit fine-grained control over their resource usage. We then propose several elastic-kernel aware concurrency policies that offer significantly better performance and concurrency compared to the current CUDA policy. We evaluate our proposals on real hardware using multiprogrammed workloads constructed from benchmarks in the Parboil 2 suite. On average, our proposals increase system throughput (STP) by 1.21x and improve the average normalized turnaround time (ANTT) by 3.73x for two-program workloads when compared to the current CUDA concurrency implementation.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '13 conference proceedings.", } @Article{Papakonstantinou:2013:ECC, author = "Alexandros Papakonstantinou and Karthik Gururaj and John A. Stratton and Deming Chen and Jason Cong and Wen-Mei W. Hwu", title = "Efficient compilation of {CUDA} kernels for high-performance computing on {FPGAs}", journal = j-TECS, volume = "13", number = "2", pages = "25:1--25:??", month = sep, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2514641.2514652", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Fri Sep 27 18:13:13 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The rise of multicore architectures across all computing domains has opened the door to heterogeneous multiprocessors, where processors of different compute characteristics can be combined to effectively boost the performance per watt of different application kernels. GPUs, in particular, are becoming very popular for speeding up compute-intensive kernels of scientific, imaging, and simulation applications. New programming models that facilitate parallel processing on heterogeneous systems containing GPUs are spreading rapidly in the computing community. By leveraging these investments, the developers of other accelerators have an opportunity to significantly reduce the programming effort by supporting those accelerator models already gaining popularity. In this work, we adapt one such language, the CUDA programming model, into a new FPGA design flow called FCUDA, which efficiently maps the coarse- and fine-grained parallelism exposed in CUDA onto the reconfigurable fabric. Our CUDA-to-FPGA flow employs AutoPilot, an advanced high-level synthesis tool (available from Xilinx) which enables high-abstraction FPGA programming. FCUDA is based on a source-to-source compilation that transforms the SIMT (Single Instruction, Multiple Thread) CUDA code into task-level parallel C code for AutoPilot. We describe the details of our CUDA-to-FPGA flow and demonstrate the highly competitive performance of the resulting customized FPGA multicore accelerators. To the best of our knowledge, this is the first CUDA-to-FPGA flow to demonstrate the applicability and potential advantage of using the CUDA programming model for high-performance computing in FPGAs.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J840", } @Article{Pennycook:2013:IPP, author = "S. J. Pennycook and S. D. Hammond and S. A. Wright and J. A. Herdman and I. Miller and S. A. Jarvis", title = "An investigation of the performance portability of {OpenCL}", journal = j-J-PAR-DIST-COMP, volume = "73", number = "11", pages = "1439--1450", month = nov, year = "2013", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon Sep 23 11:46:28 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/07437315", URL = "http://www.sciencedirect.com/science/article/pii/S0743731512001669", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Poulson:2013:ENF, author = "Jack Poulson and Bryan Marker and Robert A. van de Geijn and Jeff R. Hammond and Nichols A. Romero", title = "{Elemental}: a New Framework for Distributed Memory Dense Matrix Computations", journal = j-TOMS, volume = "39", number = "2", pages = "13:1--13:24", month = feb, year = "2013", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2427023.2427030", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Feb 20 16:46:13 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "Parallelizing dense matrix computations to distributed memory architectures is a well-studied subject and generally considered to be among the best understood domains of parallel computing. Two packages, developed in the mid 1990s, still enjoy regular use: ScaLAPACK and PLAPACK. With the advent of many-core architectures, which may very well take the shape of distributed memory architectures within a single processor, these packages must be revisited since the traditional MPI-based approaches will likely need to be extended. Thus, this is a good time to review lessons learned since the introduction of these two packages and to propose a simple yet effective alternative. Preliminary performance results show the new solution achieves competitive, if not superior, performance on large clusters.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Ragan-Kelley:2013:HLC, author = "Jonathan Ragan-Kelley and Connelly Barnes and Andrew Adams and Sylvain Paris and Fr{\'e}do Durand and Saman Amarasinghe", title = "{Halide}: a language and compiler for optimizing parallelism, locality, and recomputation in image processing pipelines", journal = j-SIGPLAN, volume = "48", number = "6", pages = "519--530", month = jun, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499370.2462176", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:38 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Image processing pipelines combine the challenges of stencil computations and stream programs. They are composed of large graphs of different stencil stages, as well as complex reductions, and stages with global or data-dependent access patterns. Because of their complex structure, the performance difference between a naive implementation of a pipeline and an optimized one is often an order of magnitude. Efficient implementations require optimization of both parallelism and locality, but due to the nature of stencils, there is a fundamental tension between parallelism, locality, and introducing redundant recomputation of shared values. We present a systematic model of the tradeoff space fundamental to stencil pipelines, a schedule representation which describes concrete points in this space for each stage in an image processing pipeline, and an optimizing compiler for the Halide image processing language that synthesizes high performance implementations from a Halide algorithm and a schedule. Combining this compiler with stochastic search over the space of schedules enables terse, composable programs to achieve state-of-the-art performance on a wide range of real image processing pipelines, and across different hardware architectures, including multicores with SIMD, and heterogeneous CPU+GPU execution. From simple Halide programs written in a few hours, we demonstrate performance up to 5x faster than hand-tuned C, intrinsics, and CUDA implementations optimized by experts over weeks or months, for image processing applications beyond the reach of past automatic compilers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '13 conference proceedings.", } @Article{Reyes:2013:PEO, author = "Ruym{\'a}n Reyes and Iv{\'a}n L{\'o}pez and Juan J. Fumero and Francisco de Sande", title = "A preliminary evaluation of {OpenACC} implementations", journal = j-J-SUPERCOMPUTING, volume = "65", number = "3", pages = "1063--1075", month = sep, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-012-0853-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Feb 8 10:21:44 MST 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=65&issue=3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-012-0853-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Rodrigues:2013:MAA, author = "A. Wendell O. Rodrigues and Fr{\'e}d{\'e}ric Guyomarc'h and Jean-Luc Dekeyser", title = "An {MDE} Approach for Automatic Code Generation from {UML\slash MARTE} to {OpenCL}", journal = j-COMPUT-SCI-ENG, volume = "15", number = "1", pages = "46--55", month = jan # "\slash " # feb, year = "2013", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2012.35", ISSN = "1521-9615", ISSN-L = "1521-9615", bibdate = "Fri Jun 21 08:34:49 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Rodrigues:2013:POM, author = "Eduardo R. Rodrigues and Philippe O. A. Navaux and Jairo Panetta and Celso L. Mendes", title = "Preserving the original {MPI} semantics in a virtualized processor environment", journal = j-SCI-COMPUT-PROGRAM, volume = "78", number = "4", pages = "412--421", day = "1", month = apr, year = "2013", CODEN = "SCPGD4", DOI = "https://doi.org/10.1016/j.scico.2012.07.005", ISSN = "0167-6423 (print), 1872-7964 (electronic)", ISSN-L = "0167-6423", bibdate = "Mon Feb 4 10:59:59 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib; http://www.sciencedirect.com/science/journal/01676423", URL = "http://www.sciencedirect.com/science/article/pii/S0167642312001335", acknowledgement = ack-nhfb, fjournal = "Science of Computer Programming", journal-URL = "http://www.sciencedirect.com/science/journal/01676423", remark = "Secial section on Mutation Testing and Analysis (Mutation 2010) \& Special section on the Programming Languages track at the 25th ACM Symposium on Applied Computing.", } @Article{Rosen:2013:PVA, author = "Paul Rosen", title = "Performance: A Visual Approach to Investigating Shared and Global Memory Behavior of {CUDA} Kernels", journal = j-CGF, volume = "32", number = "3pt2", pages = "161--170", month = jun, year = "2013", CODEN = "CGFODY", DOI = "https://doi.org/10.1111/cgf.12103", ISSN = "0167-7055 (print), 1467-8659 (electronic)", ISSN-L = "0167-7055", bibdate = "Sat Feb 8 15:27:43 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/cgf.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computer Graphics World", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/", onlinedate = "1 Jul 2013", } @Article{Sampaio:2013:DA, author = "Diogo Sampaio and Rafael Martins de Souza and Sylvain Collange and Fernando Magno Quint{\~a}o Pereira", title = "Divergence analysis", journal = j-TOPLAS, volume = "35", number = "4", pages = "13:1--13:??", month = dec, year = "2013", CODEN = "ATPSDT", DOI = "https://doi.org/10.1145/2523815", ISSN = "0164-0925 (print), 1558-4593 (electronic)", ISSN-L = "0164-0925", bibdate = "Tue Dec 31 14:22:03 MST 2013", bibsource = "http://www.acm.org/pubs/contents/journals/toplas/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toplas.bib", abstract = "Growing interest in graphics processing units has brought renewed attention to the Single Instruction Multiple Data (SIMD) execution model. SIMD machines give application developers tremendous computational power; however, programming them is still challenging. In particular, developers must deal with memory and control-flow divergences. These phenomena stem from a condition that we call data divergence, which occurs whenever two processing elements (PEs) see the same variable name holding different values. This article introduces divergence analysis, a static analysis that discovers data divergences. This analysis, currently deployed in an industrial quality compiler, is useful in several ways: it improves the translation of SIMD code to non-SIMD CPUs, it helps developers to manually improve their SIMD applications, and it also guides the automatic optimization of SIMD programs. We demonstrate this last point by introducing the notion of a divergence-aware register spiller. This spiller uses information from our analysis to either rematerialize or share common data between PEs. As a testimony of its effectiveness, we have tested it on a suite of 395 CUDA kernels from well-known benchmarks. The divergence-aware spiller produces GPU code that is 26.21\% faster than the code produced by the register allocator used in the baseline compiler.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Programming Languages and Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783", } @Article{Shen:2013:ACE, author = "Jie Shen and Jianbin Fang and Henk Sips and Ana Lucia Varbanescu", title = "An application-centric evaluation of {OpenCL} on multi-core {CPUs}", journal = j-PARALLEL-COMPUTING, volume = "39", number = "12", pages = "834--850", month = dec, year = "2013", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Dec 3 18:06:48 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001014", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{SM-D:2013:BRC, author = "SM-D", title = "Book Review: {{\booktitle{CUDA Programming}}, Shane Cook. Morgan Kaufmann. ISBN 978-0-12-415933-4}", journal = j-NETWORK-SECURITY, volume = "2013", number = "1", pages = "4--4", month = jan, year = "2013", CODEN = "NTSCF5", DOI = "https://doi.org/10.1016/S1353-4858(13)70015-1", ISSN = "1353-4858 (print), 1872-9371 (electronic)", ISSN-L = "1353-4858", bibdate = "Mon Dec 4 17:00:50 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/network-security.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S1353485813700151", acknowledgement = ack-nhfb, fjournal = "Network Security", journal-URL = "https://www.sciencedirect.com/journal/network-security", } @Article{Totoni:2013:EFE, author = "Ehsan Totoni and Mert Dikmen and Mar{\'\i}a Jes{\'u}s Garzar{\'a}n", title = "Easy, fast, and energy-efficient object detection on heterogeneous on-chip architectures", journal = j-TACO, volume = "10", number = "4", pages = "45:1--45:??", month = dec, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2541228.2555302", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Thu Jan 9 10:42:35 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "We optimize a visual object detection application (that uses Vision Video Library kernels) and show that OpenCL is a unified programming paradigm that can provide high performance when running on the Ivy Bridge heterogeneous on-chip architecture. We evaluate different mapping techniques and show that running each kernel where it fits the best and using software pipelining can provide 1.91 times higher performance and 42\% better energy efficiency. We also show how to trade accuracy for energy at runtime. Overall, our application can perform accurate object detection at 40 frames per second (fps) in an energy-efficient manner.", acknowledgement = ack-nhfb, articleno = "45", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Vaidya:2013:SDO, author = "Aniruddha S. Vaidya and Anahita Shayesteh and Dong Hyuk Woo and Roy Saharoy and Mani Azimi", title = "{SIMD} divergence optimization through intra-warp compaction", journal = j-COMP-ARCH-NEWS, volume = "41", number = "3", pages = "368--379", month = jun, year = "2013", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2508148.2485954", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Sat Jul 27 06:58:55 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", note = "ICSA '13 conference proceedings.", abstract = "SIMD execution units in GPUs are increasingly used for high performance and energy efficient acceleration of general purpose applications. However, SIMD control flow divergence effects can result in reduced execution efficiency in a class of GPGPU applications, classified as divergent applications. Improving SIMD efficiency, therefore, has the potential to bring significant performance and energy benefits to a wide range of such data parallel applications. Recently, the SIMD divergence problem has received increased attention, and several micro-architectural techniques have been proposed to address various aspects of this problem. However, these techniques are often quite complex and, therefore, unlikely candidates for practical implementation. In this paper, we propose two micro-architectural optimizations for GPGPU architectures, which utilize relatively simple execution cycle compression techniques when certain groups of turned-off lanes exist in the instruction stream. We refer to these optimizations as basic cycle compression (BCC) and swizzled-cycle compression (SCC), respectively. In this paper, we will outline the additional requirements for implementing these optimizations in the context of the studied GPGPU architecture. Our evaluations with divergent SIMD workloads from OpenCL (GPGPU) and OpenGL (graphics) applications show that BCC and SCC reduce execution cycles in divergent applications by as much as 42\% (20\% on average). For a subset of divergent workloads, the execution time is reduced by an average of 7\% for today's GPUs or by 18\% for future GPUs with a better provisioned memory subsystem. The key contribution of our work is in simplifying the micro-architecture for delivering divergence optimizations while providing the bulk of the benefits of more complex approaches.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", } @Article{Vogel:2013:BWC, author = "Thomas Vogel", title = "{{\booktitle{All the Way to CUDA}}} [Book review]", journal = j-COMPUT-SCI-ENG, volume = "15", number = "5", pages = "6--8", month = sep # "\slash " # oct, year = "2013", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2013.101", ISSN = "1521-9615", ISSN-L = "1521-9615", bibdate = "Sat Apr 19 10:17:39 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Wang:2013:PMO, author = "Cheng Wang and Sunita Chandrasekaran and Peng Sun and Barbara Chapman and Jim Holt", title = "Portable mapping of {openMP} to multicore embedded systems using {MCA APIs}", journal = j-SIGPLAN, volume = "48", number = "5", pages = "153--162", month = may, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2499369.2465569", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Jul 1 17:15:32 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Multicore embedded systems are being widely used in telecommunication systems, robotics, medical applications and more.While they offer a high-performance with low-power solution, programming in an efficient way is still a challenge. In order to exploit the capabilities that the hardware offers, software developers are expected to handle many of the low-level details of programming including utilizing DMA, ensuring cache coherency, and inserting synchronization primitives explicitly. The state-of-the-art involves solutions where the software toolchain is too vendor-specific thus tying the software to a particular hardware leaving no room-for portability. In this paper we present a runtime system to explore mapping a high-level programming model, OpenMP, on to multicore embedded systems. A key feature of our scheme is that unlike the existing approaches that largely rely on POSIX threads, our approach leverages the Multicore Association (MCA) APIs as an OpenMP translation layer. The MCA APIs is a set of low-level APIs handling resource management, inter-process communications and task scheduling for multicore embedded systems. By deploying the MCA APIs, our runtime is able to effectively capture the characteristics of multicore embedded systems compared with the POSIX threads. Furthermore, the MCA layer enables our runtime implementation to be portable across various architectures. Thus programmers only need to maintain a single OpenMP code base which is compatible by various compilers, while on the other hand, the code is portable across different possible types of platforms. We have evaluated our runtime system using several embedded benchmarks. The experiments demonstrate promising and competitive performance compared to the native approach for the platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "GPCE '12 conference proceedings.", } @Article{Wu:2013:PMH, author = "Xingfu Wu and Valerie Taylor", title = "Performance modeling of hybrid {MPI\slash OpenMP} scientific applications on large-scale multicore supercomputers", journal = j-J-COMP-SYS-SCI, volume = "79", number = "8", pages = "1256--1268", month = dec, year = "2013", CODEN = "JCSSBM", DOI = "https://doi.org/10.1016/j.jcss.2013.02.005", ISSN = "0022-0000 (print), 1090-2724 (electronic)", ISSN-L = "0022-0000", bibdate = "Tue Jan 29 15:27:23 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcompsyssci.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0022000013000639", acknowledgement = ack-nhfb, fjournal = "Journal of Computer and System Sciences", journal-URL = "http://www.sciencedirect.com/science/journal/00220000", } @Article{Xu:2013:PMO, author = "Shiming Xu and Wei Xue and Hai Xiang Lin", title = "Performance modeling and optimization of sparse matrix-vector multiplication on {NVIDIA CUDA} platform", journal = j-J-SUPERCOMPUTING, volume = "63", number = "3", pages = "710--721", month = mar, year = "2013", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-011-0626-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Apr 1 14:50:47 MDT 2013", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=63&issue=3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-011-0626-0; http://link.springer.com/content/pdf/10.1007/s11227-011-0626-0", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Yan:2013:SFS, author = "Shengen Yan and Guoping Long and Yunquan Zhang", title = "{StreamScan}: fast scan algorithms for {GPUs} without global barrier synchronization", journal = j-SIGPLAN, volume = "48", number = "8", pages = "229--238", month = aug, year = "2013", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2517327.2442539", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Aug 26 13:48:51 MDT 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", note = "PPoPP '13 Conference proceedings.", abstract = "Scan (also known as prefix sum) is a very useful primitive for various important parallel algorithms, such as sort, BFS, SpMV, compaction and so on. Current state of the art of GPU based scan implementation consists of three consecutive Reduce-Scan-Scan phases. This approach requires at least two global barriers and 3N (N is the problem size) global memory accesses. In this paper we propose StreamScan, a novel approach to implement scan on GPUs with only one computation phase. The main idea is to restrict synchronization to only adjacent workgroups, and thereby eliminating global barrier synchronization completely. The new approach requires only 2N global memory accesses and just one kernel invocation. On top of this we propose two important optimizations to further boost performance speedups, namely thread grouping to eliminate unnecessary local barriers, and register optimization to expand the on chip problem size. We designed an auto-tuning framework to search the parameter space automatically to generate highly optimized codes for both AMD and Nvidia GPUs. We implemented our technique with OpenCL. Compared with previous fast scan implementations, experimental results not only show promising performance speedups, but also reveal dramatic different optimization tradeoffs between Nvidia and AMD GPU platforms.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", } @Article{Yu:2013:AGA, author = "Zhibin Yu and Lieven Eeckhout and Nilanjan Goswami and Tao Li and Lizy John and Hai Jin and Chengzhong Xu", title = "Accelerating {GPGPU} architecture simulation", journal = j-SIGMETRICS, volume = "41", number = "1", pages = "331--332", month = jun, year = "2013", CODEN = "????", DOI = "https://doi.org/10.1145/2494232.2465540", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Fri Feb 28 06:09:59 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmetrics.bib", abstract = "Recently, graphics processing units (GPUs) have opened up new opportunities for speeding up general-purpose parallel applications due to their massive computational power and up to hundreds of thousands of threads enabled by programming models such as CUDA. However, due to the serial nature of existing micro-architecture simulators, these massively parallel architectures and workloads need to be simulated sequentially. As a result, simulating GPGPU architectures with typical benchmarks and input data sets is extremely time-consuming. This paper addresses the GPGPU architecture simulation challenge by generating miniature, yet representative GPGPU kernels. We first summarize the static characteristics of an existing GPGPU kernel in a profile, and analyze its dynamic behavior using the novel concept of the divergence flow statistics graph (DFSG). We subsequently use a GPGPU kernel synthesizing framework to generate a miniature proxy of the original kernel, which can reduce simulation time significantly. The key idea is to reduce the number of simulated instructions by decreasing per-thread iteration counts of loops. Our experimental results show that our approach can accelerate GPGPU architecture simulation by a factor of 88X on average and up to 589X with an average IPC relative error of 5.6\%.", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "http://portal.acm.org/toc.cfm?id=J618", } @Article{Zhang:2013:MPI, author = "Xiaohua Zhang and Sergio E. Wong and Felice C. Lightstone", title = "Message passing interface and multithreading hybrid for parallel molecular docking of large databases on petascale high performance computing machines", journal = j-J-COMPUT-CHEM, volume = "34", number = "11", pages = "915--927", day = "30", month = apr, year = "2013", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.23214", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Mon Apr 1 14:26:54 MDT 2013", bibsource = "http://www.interscience.wiley.com/jpages/0192-8651; https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www3.interscience.wiley.com/journalfinder.html", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X", onlinedate = "23 Jan 2013", } @Article{Amritkar:2014:EPC, author = "Amit Amritkar and Surya Deb and Danesh Tafti", title = "Efficient parallel {CFD-DEM} simulations using {OpenMP}", journal = j-J-COMPUT-PHYS, volume = "256", number = "??", pages = "501--519", day = "1", month = jan, year = "2014", CODEN = "JCTPAH", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Wed Nov 13 14:21:07 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999113006128", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Antonelli:2014:ATS, author = "Laura Antonelli and Stefania Corsaro and Zelda Marino and Mariarosaria Rizzardi", title = "Algorithm 944: {Talbot} Suite: Parallel Implementations of {Talbot}'s Method for the Numerical Inversion of {Laplace} Transforms", journal = j-TOMS, volume = "40", number = "4", pages = "29:1--29:18", month = jun, year = "2014", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2616909", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Jul 2 18:28:58 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "We present Talbot Suite, a C parallel software collection for the numerical inversion of Laplace Transforms, based on Talbot's method. It is designed to fit both single and multiple Laplace inversion problems, which arise in several application and research fields. In our software, we achieve high accuracy and efficiency, making full use of modern architectures and introducing two different levels of parallelism: coarse and fine grained parallelism. They offer a reasonable tradeoff between accuracy, the main aspect for a few inversions, and efficiency, the main aspect for multiple inversions. To take into account modern high-performance computing architectures, Talbot Suite provides different software versions: an OpenMP-based version for shared memory machines and a MPI-based version for distributed memory machines. Moreover, oriented to hybrid architectures, a combined MPI/OpenMP-based implementation is provided too. We describe our parallel algorithms and the software organization. We also report some performance results. Our software includes sample programs to call the Talbot Suite functions from C and from MATLAB.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Awile:2014:PWF, author = "Omar Awile and Ivo F. Sbalzarini", title = "A {Pthreads} Wrapper for {Fortran 2003}", journal = j-TOMS, volume = "40", number = "3", pages = "19:1--19:15", month = apr, year = "2014", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2558889", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon Apr 21 17:42:14 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "With the advent of multicore processors, numerical and mathematical software relies on parallelism in order to benefit from hardware performance increases. We present the design and use of a Fortran 2003 wrapper for POSIX threads, called forthreads. Forthreads is complete in the sense that is provides native Fortran 2003 interfaces to all pthreads routines where possible. We demonstrate the use and efficiency of forthreads for SIMD parallelism and task parallelism. We present forthreads/MPI implementations that enable hybrid shared-/distributed-memory parallelism in Fortran 2003. Our benchmarks show that forthreads offers performance comparable to that of OpenMP, but better thread control and more freedom. We demonstrate the latter by presenting a multithreaded Fortran 2003 library for POSIX Internet sockets, enabling interactive numerical simulations with runtime control.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Barrett:2014:EMM, author = "Brian W. Barrett and Ron Brightwell and Ryan Grant and Simon D. Hammond and K. Scott Hemmert", title = "An evaluation of {MPI} message rate on hybrid-core processors", journal = j-IJHPCA, volume = "28", number = "4", pages = "415--424", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014552085", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/415", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Beaugnon:2014:VVO, author = "Ulysse Beaugnon and Alexey Kravets and Sven van Haastregt and Riyadh Baghdadi and David Tweed and Javed Absar and Anton Lokhmotov", title = "{VOBLA}: a vehicle for optimized basic linear algebra", journal = j-SIGPLAN, volume = "49", number = "5", pages = "115--124", month = may, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666357.2597818", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:37:30 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present VOBLA, a domain-specific language designed for programming linear algebra libraries. VOBLA is compiled to PENCIL, a domain independent intermediate language designed for efficient mapping to accelerator architectures such as GPGPUs. PENCIL is compiled to efficient, platform-specific OpenCL code using techniques based on the polyhedral model. This approach addresses both the programmer productivity and performance portability concerns associated with accelerator programming. We demonstrate our approach by using VOBLA to implement a BLAS library. We have evaluated the performance of OpenCL code generated using our compilation flow on ARM Mali, AMD Radeon, and AMD Opteron platforms. The generated code is currently on average 1.9x slower than highly hand-optimized OpenCL code, but on average 8.1x faster than straightforward OpenCL code. Given that the VOBLA coding takes significantly less effort compared to hand-optimizing OpenCL code, we believe our approach leads to improved productivity and performance portability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "LCTES '14 conference proceedings.", } @Article{Bethune:2014:PAA, author = "Iain Bethune and J. Mark Bull and Nicholas J. Dingle and Nicholas J. Higham", title = "Performance analysis of asynchronous {Jacobi}'s method implemented in {MPI}, {SHMEM} and {OpenMP}", journal = j-IJHPCA, volume = "28", number = "1", pages = "97--111", month = feb, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342013493123", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Mar 14 15:39:59 MDT 2014", bibsource = "http://hpc.sagepub.com/content/28/1.toc; https://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/1/97.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "July 11, 2013", } @Article{Blas:2014:RAM, author = "Javier Garcia Blas and Jesus Carretero", title = "Recent advances in the {Message Passing Interface}", journal = j-IJHPCA, volume = "28", number = "4", pages = "387--389", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014549273", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/387", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Coole:2014:FFH, author = "James Coole and Greg Stitt", title = "Fast, Flexible High-Level Synthesis from {OpenCL} using Reconfiguration Contexts", journal = j-IEEE-MICRO, volume = "34", number = "1", pages = "42--53", month = jan # "\slash " # feb, year = "2014", CODEN = "IEMIDZ", DOI = "https://doi.org/10.1109/MM.2013.108", ISSN = "0272-1732", ISSN-L = "0272-1732", bibdate = "Thu Aug 21 08:02:34 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeemicro.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", } @Article{Cores:2014:FAM, author = "Iv{\'a}n Cores and Gabriel Rodr{\'\i}guez and Patricia Gonz{\'a}lez and Mar{\'\i}a J. Mart{\'\i}n", title = "Failure Avoidance in {MPI} Applications Using an Application-Level Approach", journal = j-COMP-J, volume = "57", number = "1", pages = "100--114", month = jan, year = "2014", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxs158", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Mon Feb 3 17:02:40 MST 2014", bibsource = "http://comjnl.oxfordjournals.org/content/57/1.toc; https://www.math.utah.edu/pub/tex/bib/compj2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://comjnl.oxfordjournals.org/content/57/1/100.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", onlinedate = "December 18, 2012", } @Article{Cores:2014:MAL, author = "Iv{\'a}n Cores and Gabriel Rodr{\'\i}guez and Mar{\'\i}a J. Mart{\'\i}n", title = "In-memory application-level checkpoint-based migration for {MPI} programs", journal = j-J-SUPERCOMPUTING, volume = "70", number = "2", pages = "660--670", month = nov, year = "2014", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1120-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Feb 13 12:32:19 MST 2015", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=70&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1120-2", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Cunningham:2014:RXE, author = "David Cunningham and David Grove and Benjamin Herta and Arun Iyengar and Kiyokuni Kawachiya and Hiroki Murata and Vijay Saraswat and Mikio Takeuchi and Olivier Tardieu", title = "Resilient {X10}: efficient failure-aware programming", journal = j-SIGPLAN, volume = "49", number = "8", pages = "67--80", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555248", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scale-out programs run on multiple processes in a cluster. In scale-out systems, processes can fail. Computations using traditional libraries such as MPI fail when any component process fails. The advent of Map Reduce, Resilient Data Sets and MillWheel has shown dramatic improvements in productivity are possible when a high-level programming framework handles scale-out and resilience automatically. We are concerned with the development of general-purpose languages that support resilient programming. In this paper we show how the X10 language and implementation can be extended to support resilience. In Resilient X10, places may fail asynchronously, causing loss of the data and tasks at the failed place. Failure is exposed through exceptions. We identify a {\em Happens Before Invariance Principle} and require the runtime to automatically repair the global control structure of the program to maintain this principle. We show this reduces much of the burden of resilient programming. The programmer is only responsible for continuing execution with fewer computational resources and the loss of part of the heap, and can do so while taking advantage of domain knowledge. We build a complete implementation of the language, capable of executing benchmark applications on hundreds of nodes. We describe the algorithms required to make the language runtime resilient. We then give three applications, each with a different approach to fault tolerance (replay, decimation, and domain-level checkpointing). These can be executed at scale and survive node failure. We show that for these programs the overhead of resilience is a small fraction of overall runtime by comparing to equivalent non-resilient X10 programs. On one program we show end-to-end performance of Resilient X10 is ~100x faster than Hadoop.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{DAgostino:2014:CAM, author = "Daniele D'Agostino and Andrea Clematis and Sergio Decherchi and Walter Rocchia and Luciano Milanesi and Ivan Merelli", title = "{CUDA} accelerated molecular surface generation", journal = j-CCPE, volume = "26", number = "10", pages = "1819--1831", month = jul, year = "2014", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3120", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Sep 9 16:46:30 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "15 Aug 2013", } @Article{Didelot:2014:IMC, author = "Sylvain Didelot and Patrick Carribault and Marc P{\'e}rache and William Jalby", title = "Improving {MPI} communication overlap with collaborative polling", journal = j-COMPUTING, volume = "96", number = "4", pages = "263--278", month = apr, year = "2014", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-013-0327-z", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Fri Jun 6 10:07:21 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00607-013-0327-z", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Dinan:2014:ECC, author = "James Dinan and Ryan E. Grant and Pavan Balaji and David Goodell and Douglas Miller and Marc Snir and Rajeev Thakur", title = "Enabling communication concurrency through flexible {MPI} endpoints", journal = j-IJHPCA, volume = "28", number = "4", pages = "390--405", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014548772", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/390", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "September 23, 2014", } @Article{DiPierro:2014:PPP, author = "Massimo {Di Pierro}", title = "Portable Parallel Programs with {Python} and {OpenCL}", journal = j-COMPUT-SCI-ENG, volume = "16", number = "1", pages = "34--40", month = jan # "\slash " # feb, year = "2014", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2013.99", ISSN = "1521-9615", ISSN-L = "1521-9615", bibdate = "Sat Apr 19 10:17:39 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Fang:2014:API, author = "Jianbin Fang and Henk Sips and Ana Lucia Varbanescu", title = "{Aristotle}: A performance impact indicator for the {OpenCL} kernels using local memory", journal = j-SCI-PROG, volume = "22", number = "3", pages = "239--257", month = "????", year = "2014", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-140390", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 9 18:01:15 MDT 2014", bibsource = "http://www.iospress.nl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprog.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @InProceedings{Feng:2014:MSP, author = "Chunsheng Feng and Shi Shu and Jinchao Xu and Chen-Song Zhang", title = "A Multi-Stage Preconditioner for the Black Oil Model and Its {OpenMP} Implementation", crossref = "Erhel:2014:DDM", volume = "98", pages = "141--153", year = "2014", DOI = "https://doi.org/10.1007/978-3-319-05789-7_11", bibdate = "Sat Dec 12 10:22:13 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-319-05789-7_11/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-319-05789-7", book-URL = "http://www.springerlink.com/content/978-3-319-05789-7", } @Article{Feng:2014:SBS, author = "Xiaowen Feng and Hai Jin and Ran Zheng and Zhiyuan Shao and Lei Zhu", title = "A segment-based sparse matrix--vector multiplication on {CUDA}", journal = j-CCPE, volume = "26", number = "1", pages = "271--286", month = jan, year = "2014", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.2978", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Feb 8 15:45:08 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "7 Dec 2012", } @Article{Gerstenberger:2014:EHS, author = "Robert Gerstenberger and Maciej Besta and Torsten Hoefler", title = "Enabling highly-scalable remote memory access programming with {MPI-3 One Sided}", journal = j-SCI-PROG, volume = "22", number = "2", pages = "75--91", month = "????", year = "2014", CODEN = "SCIPEV", DOI = "https://doi.org/10.3233/SPR-140383", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 9 18:01:01 MDT 2014", bibsource = "http://www.iospress.nl/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprog.bib", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "http://iospress.metapress.com/content/1058-9244", } @Article{Gonina:2014:SMC, author = "Ekaterina Gonina and Gerald Friedland and Eric Battenberg and Penporn Koanantakool and Michael Driscoll and Evangelos Georganas and Kurt Keutzer", title = "Scalable multimedia content analysis on parallel platforms using {Python}", journal = j-TOMCCAP, volume = "10", number = "2", pages = "18:1--18:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2517151", ISSN = "1551-6857 (print), 1551-6865 (electronic)", ISSN-L = "1551-6857", bibdate = "Thu Mar 13 07:37:57 MDT 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tomccap.bib", abstract = "In this new era dominated by consumer-produced media there is a high demand for web-scalable solutions to multimedia content analysis. A compelling approach to making applications scalable is to explicitly map their computation onto parallel platforms. However, developing efficient parallel implementations and fully utilizing the available resources remains a challenge due to the increased code complexity, limited portability and required low-level knowledge of the underlying hardware. In this article, we present PyCASP, a Python-based framework that automatically maps computation onto parallel platforms from Python application code to a variety of parallel platforms. PyCASP is designed using a systematic, pattern-oriented approach to offer a single software development environment for multimedia content analysis applications. Using PyCASP, applications can be prototyped in a couple hundred lines of Python code and automatically scale to modern parallel processors. Applications written with PyCASP are portable to a variety of parallel platforms and efficiently scale from a single desktop Graphics Processing Unit (GPU) to an entire cluster with a small change to application code. To illustrate our approach, we present three multimedia content analysis applications that use our framework: a state-of-the-art speaker diarization application, a content-based music recommendation system based on the Million Song Dataset, and a video event detection system for consumer-produced videos. We show that across this wide range of applications, our approach achieves the goal of automatic portability and scalability while at the same time allowing easy prototyping in a high-level language and efficient performance of low-level optimized code.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Multimedia Computing, Communications, and Applications", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961", } @Article{Guerrero:2014:PCM, author = "Gin{\'e}s D. Guerrero and Richard M. Wallace and Jos{\'e} L. V{\'a}zquez-Poletti and Jos{\'e} M. Cecilia and Jos{\'e} M. Garc{\'\i}a and Daniel Mozos and Horacio P{\'e}rez-S{\'a}nchez", title = "A performance\slash cost model for a {CUDA} drug discovery application on physical and public cloud infrastructures", journal = j-CCPE, volume = "26", number = "10", pages = "1787--1798", month = jul, year = "2014", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3117", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Sep 9 16:46:30 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 Aug 2013", } @Article{Hall:2014:MMC, author = "Clifford Hall and Weixiao Ji and Estela Blaisten-Barojas", title = "The {Metropolis Monte Carlo} method with {CUDA} enabled {Graphic Processing Units}", journal = j-J-COMPUT-PHYS, volume = "258", number = "??", pages = "871--879", day = "1", month = feb, year = "2014", CODEN = "JCTPAH", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Mon Dec 23 10:39:12 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999113007626", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991/", } @Book{Hanson:2014:NCM, author = "Richard J. Hanson and Tim Hopkins", title = "Numerical computing with modern {Fortran}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "xv + 244", year = "2014", ISBN = "1-61197-311-2 (paperback), 1-61197-312-0 (e-book)", ISBN-13 = "978-1-61197-311-2 (paperback), 978-1-61197-312-9 (e-book)", LCCN = "QA76.73.F25 H367 2013", bibdate = "Wed Mar 12 11:09:16 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/numana2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = "Applied mathematics", abstract = "The Fortran language standard has undergone significant upgrades in recent years (1990, 1995, 2003, and 2008). \booktitle{Numerical Computing with Modern Fortran} illustrates many of these improvements through practical solutions to a number of scientific and engineering problems. Readers will discover: techniques for modernizing algorithms written in Fortran; examples of Fortran interoperating with C or C++ programs, plus using the IEEE floating-point standard for efficiency; illustrations of parallel Fortran programming using coarrays, MPI, and OpenMP; and a supplementary website with downloadable source codes discussed in the book.", acknowledgement = ack-nhfb, subject = "FORTRAN (Computer program language); Numerical analysis; Computer programs; Science; Mathematics", tableofcontents = "Introduction \\ The modern Fortran source \\ Modules for subprogram libraries \\ Generic subprograms \\ Sparse matrices, defined operations, overloaded assignment \\ Object-oriented programming for numerical applications \\ Recursion in Fortran \\ Case study: toward a modern QUADPACK routine \\ Case study: quadrature routine qag2003 \\ IEEE arithmetic features and exception handling \\ Interoperability with C \\ Defined operations for sparse matrix solutions \\ Case study: two sparse least-squares system examples \\ Message passing with MPI in standard Fortran \\ Coarrays in standard Fortran \\ OpenMP in Fortran \\ Modifying source to remove obsolescent or deleted features \\ Software testing \\ Compilers \\ Software tools \\ Fortran book code on SIAM web site \\ Bibliography \\ Index", } @InProceedings{Haynes:2014:MOA, author = "Ronald D. Haynes and Benjamin W. Ong", title = "{MPI--OpenMP} Algorithms for the Parallel Space-Time Solution of Time Dependent {PDEs}", crossref = "Erhel:2014:DDM", volume = "98", pages = "179--187", year = "2014", DOI = "https://doi.org/10.1007/978-3-319-05789-7_14", bibdate = "Sat Dec 12 10:22:13 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-319-05789-7_14/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-319-05789-7", book-URL = "http://www.springerlink.com/content/978-3-319-05789-7", } @Article{Holmen:2014:ASI, author = "John K. Holmen and David L. Foster", title = "Accelerating Single Iteration Performance of {CUDA}--Based {$3$D} Reaction--Diffusion Simulations", journal = j-INT-J-PARALLEL-PROG, volume = "42", number = "2", pages = "343--363", month = apr, year = "2014", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-013-0251-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Thu Mar 13 19:25:13 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See erratum \cite{Holmen:2014:EAS}.", URL = "http://link.springer.com/article/10.1007/s10766-013-0251-z", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Holmen:2014:EAS, author = "John K. Holmen and David L. Foster", title = "Erratum to: Accelerating Single Iteration Performance of {CUDA}--Based {$3$D} Reaction--Diffusion Simulations", journal = j-INT-J-PARALLEL-PROG, volume = "42", number = "2", pages = "364--364", month = apr, year = "2014", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-014-0305-x", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Thu Mar 13 19:25:13 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See \cite{Holmen:2014:ASI}.", URL = "http://link.springer.com/content/pdf/10.1007/s10766-014-0305-x.pdf", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Jenkins:2014:PMD, author = "John Jenkins and James Dinan and Pavan Balaji and Tom Peterka and Nagiza F. Samatova and Rajeev Thakur", title = "Processing {MPI} Derived Datatypes on Noncontiguous {GPU}-Resident Data", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "25", number = "10", pages = "2627--2637", month = oct, year = "2014", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 12 13:58:32 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2014/10/06600679-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2014/10/06600679-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Jie:2014:ASP, author = "Liang Jie and KenLi Li and Lin Shi and RangSu Liu and Jing Mei", title = "Accelerating solidification process simulation for large-sized system of liquid metal atoms using {GPU} with {CUDA}", journal = j-J-COMPUT-PHYS, volume = "257", number = "??", pages = "521--535", day = "15", month = jan, year = "2014", CODEN = "JCTPAH", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Sat Nov 30 14:26:13 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999113006803", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Joldes:2014:SSH, author = "Mioara Joldes and Valentina Popescu and Warwick Tucker", title = "Searching for Sinks for the {H{\'e}non} Map using a Multiple-precision {GPU} Arithmetic Library", journal = j-COMP-ARCH-NEWS, volume = "42", number = "4", pages = "63--68", month = sep, year = "2014", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2693714.2693726", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Wed Jun 3 11:27:35 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Today, GPUs represent an important hardware development platform for many problems in dynamical systems, where massive parallel computations are needed. Beside that, many numerical studies of chaotic dynamical systems require a computing precision higher than common floating point (FP) formats. One such application is locating invariant sets for chaotic dynamical systems. In particular, we focus on rigorously proving the existence of stable periodic orbits for the H{\'e}non map for parameter values close to the classical ones. For that, we present a multiple-precision floating-point arithmetic library in CUDA programming language for the NVIDIA GPU platform. Our library extends the precision using so-called FP expansions, where a number is represented as the unevaluated sum of standard machine precision FP numbers. This format offers the advantage of using directly available and highly optimized hardware FP operations. We generalize algorithms used by multiple-precisions libraries such as Bailey's QD, or the analogue GPU version, GQD.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '14 conference proceedings.", } @Article{Jung:2014:MCM, author = "Jaewoon Jung and Takaharu Mori and Yuji Sugita", title = "Midpoint cell method for hybrid {(MPI + OpenMP)} parallelization of molecular dynamics simulations", journal = j-J-COMPUT-CHEM, volume = "35", number = "14", pages = "1064--1072", day = "30", month = may, year = "2014", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.23591", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Wed Aug 27 06:34:07 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651", onlinedate = "23 Mar 2014", } @Article{Kamal:2014:IFG, author = "Humaira Kamal and Alan Wagner", title = "An integrated fine-grain runtime system for {MPI}", journal = j-COMPUTING, volume = "96", number = "4", pages = "293--309", month = apr, year = "2014", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-013-0329-x", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Fri Jun 6 10:07:21 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00607-013-0329-x", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Kim:2014:VVF, author = "Young-Joo Kim and Sejun Song and Yong-Kee Jun", title = "{VORD}: A Versatile On-the-fly Race Detection Tool in {OpenMP} Programs", journal = j-INT-J-PARALLEL-PROG, volume = "42", number = "6", pages = "900--930", month = dec, year = "2014", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-013-0257-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Wed Sep 10 07:13:09 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=6; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-013-0257-6", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Komura:2014:CPG, author = "Yukihiro Komura and Yutaka Okabe", title = "{CUDA} programs for the {GPU} computing of the {Swendsen--Wang} multi-cluster spin flip algorithm: {$2$D} and {$3$D} {Ising}, {Potts}, and {$ X Y $} models", journal = j-COMP-PHYS-COMM, volume = "185", number = "3", pages = "1038--1043", month = mar, year = "2014", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Feb 4 19:25:59 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465513003743", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Kumar:2014:OMC, author = "Sameer Kumar and Amith Mamidala and Philip Heidelberger and Dong Chen and Daniel Faraj", title = "Optimization of {MPI} collective operations on the {IBM Blue Gene/Q} supercomputer", journal = j-IJHPCA, volume = "28", number = "4", pages = "450--464", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014552086", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/450", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Langr:2014:APP, author = "Daniel Langr and Pavel Tvrd{\'\i}k and Tom{\'a}s Dytrych and Jerry P. Draayer", title = "{Algorithm 947}: {Paraperm} --- Parallel Generation of Random Permutations with {MPI}", journal = j-TOMS, volume = "41", number = "1", pages = "5:1--5:26", month = oct, year = "2014", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2669372", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon Oct 27 16:37:25 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "An algorithm for parallel generation of a random permutation of a large set of distinct integers is presented. This algorithm is designed for massively parallel systems with distributed memory architectures and the MPI-based runtime environments. Scalability of the algorithm is analyzed according to the memory and communication requirements. An implementation of the algorithm in a form of a software library based on the C++ programming language and the MPI application programming interface is further provided. Finally, performed experiments are described and their results discussed. The biggest of these experiments resulted in a generation of a random permutation of $ 2^{41} $ integers in slightly more than four minutes using 131072 CPU cores.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{LaSalle:2014:MBD, author = "Dominique LaSalle and George Karypis", title = "{MPI} for Big Data: New tricks for an old dog", journal = j-PARALLEL-COMPUTING, volume = "40", number = "10", pages = "754--767", month = dec, year = "2014", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Nov 24 12:48:48 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819114000830", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Lee:2014:BCA, author = "Changmin Lee and Won Woo Ro and Jean-Luc Gaudiot", title = "Boosting {CUDA} Applications with {CPU--GPU} Hybrid Computing", journal = j-INT-J-PARALLEL-PROG, volume = "42", number = "2", pages = "384--404", month = apr, year = "2014", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-013-0252-y", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Thu Mar 13 19:25:13 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-013-0252-y", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Losada:2014:EAL, author = "N. Losada and M. J. Mart{\'\i}n and G. Rodr{\'\i}guez and P. Gonz{\'a}lez", title = "Extending an Application-Level Checkpointing Tool to Provide Fault Tolerance Support to {OpenMP} Applications", journal = j-J-UCS, volume = "20", number = "9", pages = "1351--??", month = "????", year = "2014", CODEN = "????", ISSN = "0948-695X (print), 0948-6968 (electronic)", ISSN-L = "0948-6968", bibdate = "Fri Feb 13 11:25:50 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jucs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.jucs.org/jucs_20_9/extending_an_application_level", acknowledgement = ack-nhfb, fjournal = "J.UCS: Journal of Universal Computer Science", journal-URL = "http://www.jucs.org/jucs", } @Article{Luo:2014:ISM, author = "Miao Luo and Xiaoyi Lu and Khaled Hamidouche and Krishna Kandalla and Dhabaleswar K. Panda", title = "Initial study of multi-endpoint runtime for {MPI + OpenMP} hybrid programming model on multi-core systems", journal = j-SIGPLAN, volume = "49", number = "8", pages = "395--396", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555287", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "State-of-the-art MPI libraries rely on locks to guarantee thread-safety. This discourages application developers from using multiple threads to perform MPI operations. In this paper, we propose a high performance, lock-free multi-endpoint MPI runtime, which can achieve up to 40\% improvement for point-to-point operation and one representative collective operation with minimum or no modifications to the existing applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Mitra:2014:AAP, author = "Subrata Mitra and Ignacio Laguna and Dong H. Ahn and Saurabh Bagchi and Martin Schulz and Todd Gamblin", title = "Accurate application progress analysis for large-scale parallel debugging", journal = j-SIGPLAN, volume = "49", number = "6", pages = "193--203", month = jun, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2666356.2594336", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Sep 26 07:38:28 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging large-scale parallel applications is challenging. In most HPC applications, parallel tasks progress in a coordinated fashion, and thus a fault in one task can quickly propagate to other tasks, making it difficult to debug. Finding the least-progressed tasks can significantly reduce the effort to identify the task where the fault originated. However, existing approaches for detecting them suffer low accuracy and large overheads; either they use imprecise static analysis or are unable to infer progress dependence inside loops. We present a loop-aware progress-dependence analysis tool, Prodometer, which determines relative progress among parallel tasks via dynamic analysis. Our fault-injection experiments suggest that its accuracy and precision are over 90\% for most cases and that it scales well up to 16,384 MPI tasks. Further, our case study shows that it significantly helped diagnosing a perplexing error in MPI, which only manifested at large scale.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", received = "PLDI '14 conference proceedings.", } @Article{Molero-Armenta:2014:OOI, author = "M. Molero-Armenta and Ursula Iturrar{\'a}n-Viveros and S. Aparicio and M. G. Hern{\'a}ndez", title = "Optimized {OpenCL} implementation of the {Elastodynamic Finite Integration Technique} for viscoelastic media", journal = j-COMP-PHYS-COMM, volume = "185", number = "10", pages = "2683--2696", month = oct, year = "2014", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Aug 16 08:37:41 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465514001702", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Morishima:2014:PEG, author = "Shin Morishima and Hiroki Matsutani", title = "Performance Evaluations of Graph Database using {CUDA} and {OpenMP} Compatible Libraries", journal = j-COMP-ARCH-NEWS, volume = "42", number = "4", pages = "75--80", year = "2014", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2693714.2693728", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Wed Dec 3 16:18:50 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Graph databases use graph structures to store data sets as nodes, edges, and properties. They are used to store and search the relationships between a large number of nodes, such as social networking services and recommendation engines that use customer social graphs. Since computation cost for graph search queries increases as the graph becomes large, in this paper we accelerate the graph search functions (Dijkstra and A* algorithms) of a graph database Neo4j using two ways: multithreaded library and CUDA library for graphics processing units (GPUs). We use 100,000-node graphs generated based on a degree distribution of Facebook social graph for evaluations. Our multi-threaded and GPU-based implementations require an auxiliary adjacency matrix for a target graph. The results show that, when we do not take into account additional overhead to generate the auxiliary adjacency matrix, multi-threaded version improves the Dijkstra and A* search performance by 16.2x and 13.8x compared to the original implementation. The GPU-based implementation improves the Dijkstra and A* search performance by 26.2x and 32.8x. When we take into account the overhead, although the speed-ups by our implementations are reduced, by reusing the auxiliary adjacency matrix for multiple graph search queries we can significantly improve the graph search performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '14 conference proceedings.", } @Article{Nomura:2014:PAM, author = "Shimpei Nomura and Takuji Mitsuishi and Jun Suzuki and Yuki Hayashi and Masaki Kan and Hideharu Amano", title = "Performance Analysis of the Multi-{GPU} System with {ExpEther}", journal = j-COMP-ARCH-NEWS, volume = "42", number = "4", pages = "9--14", month = sep, year = "2014", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2693714.2693717", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Wed Jun 3 11:27:35 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "A GPU cluster in which each node provides a few GPUs connected with PCIe (PCI Express) is commonly used for acceleration of a large application program requiring the performance beyond a single GPU. However, in such a system, programmers are required to describe two parallel programming between nodes in MPIs or other message passing library as well as the fine grained parallel programming for intra-GPUs. As a cost effective alternative of such clusters, we propose a novel multi-GPU system with ExpEther, a virtualization technique which extends PCIe of a host CPU to Ethernet. All devices connected by ExpEther can be treated as if they were directly connected to the host. Evaluation with two application programs with and without GPU-GPU communication revealed that the proposed system with four GPUs achieved 3.88 and 3.29 times performance improvement respectively compared with a single GPU system. Compared with GPU cluster system in which each node provides a GPU, the proposed system achieved about 7\% and 30\% performance improvement, respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '14 conference proceedings.", } @Article{Olukotun:2014:BPP, author = "Kunle Olukotun", title = "Beyond parallel programming with domain specific languages", journal = j-SIGPLAN, volume = "49", number = "8", pages = "179--180", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2557966", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Today, almost all computer architectures are parallel and heterogeneous; a combination of multiple CPUs, GPUs and specialized processors. This creates a challenging problem for application developers who want to develop high performance programs without the effort required to use low-level, architecture specific parallel programming models (e.g., OpenMP for CMPs, CUDA for GPUs, MPI for clusters). Domain-specific languages (DSLs) are a promising solution to this problem because they can provide an avenue for high-level application-specific abstractions with implicit parallelism to be mapped directly to low level architecture-specific programming models; providing both high programmer productivity and high execution performance. In this talk I will describe an approach to building high performance DSLs, which is based on DSL embedding in a general purpose programming language, metaprogramming and a DSL infrastructure called Delite. I will describe how we transform DSL programs into efficient first-order low-level code using domain specific optimization, parallelism and locality optimization with parallel patterns, and architecture-specific code generation. All optimizations and transformations are implemented in Delite: an extensible DSL compiler infrastucture that significantly reduces the effort required to develop new DSLs. Delite DSLs for machine learning, data querying, graph analysis, and scientific computing all achieve performance competitive with manually parallelized C++ code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Pal:2014:PMH, author = "Anirban Pal and Abhishek Agarwala and Soumyendu Raha and Baidurya Bhattacharya", title = "Performance metrics in a hybrid {MPI--OpenMP} based molecular dynamics simulation with short-range interactions", journal = j-J-PAR-DIST-COMP, volume = "74", number = "3", pages = "2203--2214", month = mar, year = "2014", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Tue Jan 28 12:39:53 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731513002505", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315/", } @Article{Panda:2014:GAM, author = "Dhabaleswar K. Panda", title = "{GPU}-Aware {MPI} on {RDMA}-Enabled Clusters: Design, Implementation and Evaluation", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "25", number = "10", pages = "2595--2605", month = oct, year = "2014", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 12 13:58:32 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2014/10/06587715-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2014/10/06587715-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Pawliczek:2014:VED, author = "Piotr Pawliczek and Witold Dzwinel and David A. Yuen", title = "Visual exploration of data by using multidimensional scaling on multicore {CPU}, {GPU}, and {MPI} cluster", journal = j-CCPE, volume = "26", number = "3", pages = "662--682", day = "10", month = mar, year = "2014", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3027", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Feb 27 14:51:21 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "30 Apr 2013", } @Article{Pena:2014:CEC, author = "Antonio J. Pe{\~n}a and Carlos Rea{\~n}o and Federico Silla and Rafael Mayo and Enrique S. Quintana-Ort{\'\i} and Jos{\'e} Duato", title = "A complete and efficient {CUDA}-sharing solution for {HPC} clusters", journal = j-PARALLEL-COMPUTING, volume = "40", number = "10", pages = "574--588", month = dec, year = "2014", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Nov 24 12:48:48 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819114001227", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Peng:2014:BAH, author = "Yuanxi Peng and Manuel Salda{\~n}a and Christopher A. Madill and Xiaofeng Zou and Paul Chow", title = "Benefits of Adding Hardware Support for Broadcast and Reduce Operations in {MPSoC} Applications", journal = j-TRETS, volume = "7", number = "3", pages = "17:1--17:??", month = aug, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2629470", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Mon Sep 1 10:42:23 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", abstract = "MPI has been used as a parallel programming model for supercomputers and clusters and recently in MultiProcessor Systems-on-Chip (MPSoC). One component of MPI is collective communication and its performance is key for certain parallel applications to achieve good speedups. Previous work showed that, with synthetic communication-only benchmarks, communication improvements of up to 11.4-fold and 22-fold for broadcast and reduce operations, respectively, can be achieved by providing hardware support at the network level in a Network-on-Chip (NoC). However, these numbers do not provide a good estimation of the advantage for actual applications, as there are other factors that affect performance besides communications, such as computation. To this end, we extend our previous work by evaluating the impact of hardware support over a set of five parallel application kernels of varying computation-to-communication ratios. By introducing some useful computation to the performance evaluation, we obtain more representative results of the benefits of adding hardware support for broadcast and reduce operations. The experiments show that applications with lower computation-to-communication ratios benefit the most from hardware support as they highly depend on efficient collective communications to achieve better scalability. We also extend our work by doing more analysis on clock frequency, resource usage, power, and energy. The results show reasonable scalability for resource utilization and power in the network interfaces as the number of channels increases and that, even though more power is dissipated in the network interfaces due to the added hardware, the total energy used can still be less if the actual speedup is sufficient. The application kernels are executed in a 24-embedded-processor system distributed across four FPGAs.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Article{Peng:2014:IDI, author = "Yi Peng and Li Chen and Jun-Hai Yong", title = "Importance-Driven Isosurface Decimation for Visualization of Large Simulation Data Based on {OpenCL}", journal = j-COMPUT-SCI-ENG, volume = "16", number = "1", pages = "24--32", month = jan # "\slash " # feb, year = "2014", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2013.45", ISSN = "1521-9615", ISSN-L = "1521-9615", bibdate = "Sat Apr 19 10:17:39 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Priimak:2014:FDN, author = "Dmitri Priimak", title = "Finite difference numerical method for the superlattice {Boltzmann} transport equation and case comparison of {CPU(C)} and {GPU(CUDA)} implementations", journal = j-J-COMPUT-PHYS, volume = "278", number = "??", pages = "182--192", day = "1", month = dec, year = "2014", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2014.08.028", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Tue Sep 23 17:27:17 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999114005828", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991/", } @Article{Rodrigues:2014:TPS, author = "Christopher Rodrigues and Thomas Jablin and Abdul Dakkak and Wen-Mei Hwu", title = "{Triolet}: a programming system that unifies algorithmic skeleton interfaces for high-performance cluster computing", journal = j-SIGPLAN, volume = "49", number = "8", pages = "247--258", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555268", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Functional algorithmic skeletons promise a high-level programming interface for distributed-memory clusters that free developers from concerns of task decomposition, scheduling, and communication. Unfortunately, prior distributed functional skeleton frameworks do not deliver performance comparable to that achievable in a low-level distributed programming model such as C with MPI and OpenMP, even when used in concert with high-performance array libraries. There are several causes: they do not take advantage of shared memory on each cluster node; they impose a fixed partitioning strategy on input data; and they have limited ability to fuse loops involving skeletons that produce a variable number of outputs per input. We address these shortcomings in the Triolet programming language through a modular library design that separates concerns of parallelism, loop nesting, and data partitioning. We show how Triolet substantially improves the parallel performance of algorithms involving array traversals and nested, variable-size loops over what is achievable in Eden, a distributed variant of Haskell. We further demonstrate how Triolet can substantially simplify parallel programming relative to C with MPI and OpenMP while achieving 23--100\% of its performance on a 128-core cluster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Saillard:2014:PCS, author = "Emmanuelle Saillard and Patrick Carribault and Denis Barthou", title = "{PARCOACH}: Combining static and dynamic validation of {MPI} collective communications", journal = j-IJHPCA, volume = "28", number = "4", pages = "425--434", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014552204", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/425", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "September 26, 2014", } @Article{Samadi:2014:LGU, author = "Mehrzad Samadi and Amir Hormati and Janghaeng Lee and Scott Mahlke", title = "Leveraging {GPUs} using cooperative loop speculation", journal = j-TACO, volume = "11", number = "1", pages = "3:1--3:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579617", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Mar 14 17:30:52 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Graphics processing units, or GPUs, provide TFLOPs of additional performance potential in commodity computer systems that frequently go unused by most applications. Even with the emergence of languages such as CUDA and OpenCL, programming GPUs remains a difficult challenge for a variety of reasons, including the inherent algorithmic characteristics and data structure choices used by applications as well as the tedious performance optimization cycle that is necessary to achieve high performance. The goal of this work is to increase the applicability of GPUs beyond CUDA/OpenCL to implicitly data-parallel applications written in C/C++ using speculative parallelization. To achieve this goal, we propose Paragon: a static/dynamic compiler platform to speculatively run possibly data-parallel portions of sequential applications on the GPU while cooperating with the system CPU. For such loops, Paragon utilizes the GPU in an opportunistic way while orchestrating a cooperative relation between the CPU and GPU to reduce the overhead of miss-speculations. Paragon monitors the dependencies for the loops running speculatively on the GPU and nonspeculatively on the CPU using a lightweight distributed conflict detection designed specifically for GPUs, and transfers the execution to the CPU in case a conflict is detected. Paragon resumes the execution on the GPU after the CPU resolves the dependency. Our experiments show that Paragon achieves 4x on average and up to 30x speedup compared to unsafe CPU execution with four threads and 7x on average and up to 64x speedup versus sequential execution across a set of sequential but implicitly data-parallel applications.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Samadi:2014:PPB, author = "Mehrzad Samadi and Davoud Anoushe Jamshidi and Janghaeng Lee and Scott Mahlke", title = "{Paraprox}: pattern-based approximation for data parallel applications", journal = j-COMP-ARCH-NEWS, volume = "42", number = "1", pages = "35--50", month = mar, year = "2014", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2654822.2541948", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Mon Aug 18 17:12:47 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Approximate computing is an approach where reduced accuracy of results is traded off for increased speed, throughput, or both. Loss of accuracy is not permissible in all computing domains, but there are a growing number of data-intensive domains where the output of programs need not be perfectly correct to provide useful results or even noticeable differences to the end user. These soft domains include multimedia processing, machine learning, and data mining/analysis. An important challenge with approximate computing is transparency to insulate both software and hardware developers from the time, cost, and difficulty of using approximation. This paper proposes a software-only system, Paraprox, for realizing transparent approximation of data-parallel programs that operates on commodity hardware systems. Paraprox starts with a data-parallel kernel implemented using OpenCL or CUDA and creates a parameterized approximate kernel that is tuned at runtime to maximize performance subject to a target output quality (TOQ) that is supplied by the user. Approximate kernels are created by recognizing common computation idioms found in data-parallel programs (e.g., Map, Scatter/Gather, Reduction, Scan, Stencil, and Partition) and substituting approximate implementations in their place. Across a set of 13 soft data-parallel applications with at most 10\% quality degradation, Paraprox yields an average performance gain of 2.7x on a NVIDIA GTX 560 GPU and 2.5x on an Intel Core i7 quad-core processor compared to accurate execution on each platform.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "ASPLOS '14 conference proceedings.", } @Article{Samadi:2014:SPS, author = "Mehrzad Samadi and Janghaeng Lee and D. Anoushe Jamshidi and Scott Mahlke and Amir Hormati", title = "Scaling Performance via Self-Tuning Approximation for Graphics Engines", journal = j-TOCS, volume = "32", number = "3", pages = "7:1--7:??", month = sep, year = "2014", CODEN = "ACSYEC", DOI = "https://doi.org/10.1145/2631913", ISSN = "0734-2071 (print), 1557-7333 (electronic)", ISSN-L = "0734-2071", bibdate = "Wed Jan 21 07:18:28 MST 2015", bibsource = "http://www.acm.org/pubs/contents/journals/tocs/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tocs.bib", abstract = "Approximate computing, where computation accuracy is traded off for better performance or higher data throughput, is one solution that can help data processing keep pace with the current and growing abundance of information. For particular domains, such as multimedia and learning algorithms, approximation is commonly used today. We consider automation to be essential to provide transparent approximation, and we show that larger benefits can be achieved by constructing the approximation techniques to fit the underlying hardware. Our target platform is the GPU because of its high performance capabilities and difficult programming challenges that can be alleviated with proper automation. Our approach --- SAGE --- combines a static compiler that automatically generates a set of CUDA kernels with varying levels of approximation with a runtime system that iteratively selects among the available kernels to achieve speedup while adhering to a target output quality set by the user. The SAGE compiler employs three optimization techniques to generate approximate kernels that exploit the GPU microarchitecture: selective discarding of atomic operations, data packing, and thread fusion. Across a set of machine learning and image processing kernels, SAGE's approximation yields an average of 2.5$ \times $ speedup with less than 10\% quality loss compared to the accurate execution on a NVIDIA GTX 560 GPU.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Computer Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774", } @Article{Sani:2014:PDF, author = "Ardalan Amiri Sani and Kevin Boos and Shaopu Qin and Lin Zhong", title = "{I/O} paravirtualization at the device file boundary", journal = j-COMP-ARCH-NEWS, volume = "42", number = "1", pages = "319--332", month = mar, year = "2014", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2654822.2541943", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Mon Aug 18 17:12:47 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Paravirtualization is an important I/O virtualization technology since it uniquely provides all of the following benefits: the ability to share the device between multiple VMs, support for legacy devices without virtualization hardware, and high performance. However, existing paravirtualization solutions have one main limitation: they only support one I/O device class, and would require significant engineering effort to support new device classes and features. In this paper, we present Paradice, a solution that vastly simplifies I/O paravirtualization by using a common paravirtualization boundary for various I/O device classes: Unix device files. Using this boundary, the paravirtual drivers simply act as a class-agnostic indirection layer between the application and the actual device driver. We address two fundamental challenges: supporting cross-VM driver memory operations without changes to applications or device drivers and providing fault and device data isolation between guest VMs despite device driver bugs. We implement Paradice for x86, the Xen hypervisor, and the Linux and FreeBSD OSes. Our implementation paravirtualizes various GPUs, input devices, cameras, an audio device, and an Ethernet card for the netmap framework with $ \approx 7700 $ LoC, of which only $ \approx 900 $ are device class-specific. Our measurements show that Paradice achieves performance close to native for different devices and applications including netmap, 3D HD games, and OpenCL applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "ASPLOS '14 conference proceedings.", } @Article{Song:2014:DAT, author = "Sukhyun Song and Jeffrey K. Hollingsworth", title = "Designing and auto-tuning parallel {$3$-D FFT} for computation-communication overlap", journal = j-SIGPLAN, volume = "49", number = "8", pages = "181--192", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555249", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper presents a method to design and auto-tune a new parallel 3-D FFT code using the non-blocking MPI all-to-all operation. We achieve high performance by optimizing computation-communication overlap. Our code performs fully asynchronous communication without any support from special hardware. We also improve cache performance through loop tiling. To cope with the complex trade-off regarding our optimization techniques, we parameterize our code and auto-tune the parameters efficiently in a large parameter space. Experimental results from two systems confirm that our code achieves a speedup of up to 1.76x over the FFTW library.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Steinberger:2014:WTB, author = "Markus Steinberger and Michael Kenzel and Pedro Boechat and Bernhard Kerbl and Mark Dokter and Dieter Schmalstieg", title = "{Whippletree}: task-based scheduling of dynamic workloads on the {GPU}", journal = j-TOG, volume = "33", number = "6", pages = "228:1--228:??", month = nov, year = "2014", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/2661229.2661250", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Fri Nov 14 19:16:26 MST 2014", bibsource = "http://www.acm.org/pubs/contents/journals/tog/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", abstract = "In this paper, we present Whippletree, a novel approach to scheduling dynamic, irregular workloads on the GPU. We introduce a new programming model which offers the simplicity and expressiveness of task-based parallelism while retaining all aspects of the multi-level execution hierarchy essential to unlocking the full potential of a modern GPU. At the same time, our programming model lends itself to efficient implementation on the SIMD-based architecture typical of a current GPU. We demonstrate the practical utility of our model by providing a reference implementation on top of current CUDA hardware. Furthermore, we show that our model compares favorably to traditional approaches in terms of both performance as well as the range of applications that can be covered. We demonstrate the benefits of our model for recursive Reyes rendering, procedural geometry generation and volume rendering with concurrent irradiance caching.", acknowledgement = ack-nhfb, articleno = "228", fjournal = "ACM Transactions on Graphics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778", } @Article{Steuwer:2014:SHL, author = "Michel Steuwer and Sergei Gorlatch", title = "{SkelCL}: a high-level extension of {OpenCL} for {multi-GPU} systems", journal = j-J-SUPERCOMPUTING, volume = "69", number = "1", pages = "25--33", month = jul, year = "2014", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1213-y", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Sep 10 06:45:05 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=69&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1213-y", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Symeonidou:2014:DRB, author = "Christi Symeonidou and Polyvios Pratikakis and Dimitrios S. Nikolopoulos and Angelos Bilas", title = "Distributed region-based memory allocation and synchronization", journal = j-IJHPCA, volume = "28", number = "4", pages = "406--414", year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014552863", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/406", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = nov, } @Article{Teixido:2014:MBI, author = "Ivan Teixid{\'o} and Francesc Seb{\'e} and Josep Conde and Francesc Solsona", title = "{MPI}-based implementation of an enhanced algorithm to solve the {LPN} problem in a memory-constrained environment", journal = j-PARALLEL-COMPUTING, volume = "40", number = "5--6", pages = "100--112", month = may, year = "2014", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri May 30 18:33:51 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819114000453", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Thompson:2014:CIC, author = "Elizabeth A. Thompson and Timothy R. Anderson", title = "A {CUDA} implementation of the {Continuous Space Language Model}", journal = j-J-SUPERCOMPUTING, volume = "68", number = "1", pages = "65--86", month = apr, year = "2014", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-013-1023-7", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Sep 10 06:44:53 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=68&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-013-1023-7", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Tien:2014:EOS, author = "Tsan-Rong Tien and Yi-Ping You", title = "Enabling {OpenCL} support for {GPGPU} in Kernel-based Virtual Machine", journal = j-SPE, volume = "44", number = "5", pages = "483--510", month = may, year = "2014", CODEN = "SPEXBL", DOI = "https://doi.org/10.1002/spe.2166", ISSN = "0038-0644 (print), 1097-024X (electronic)", ISSN-L = "0038-0644", bibdate = "Wed Sep 10 05:57:32 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/spe.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "Software --- Practice and Experience", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X", onlinedate = "22 Nov 2012", } @Article{Traff:2014:SPE, author = "Jesper Larsson Tr{\"a}ff and Siegfried Benkner", title = "Selected Papers from {EuroMPI 2012}", journal = j-COMPUTING, volume = "96", number = "4", pages = "259--261", month = apr, year = "2014", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-013-0335-z", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Fri Jun 6 10:07:21 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00607-013-0335-z", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Vikas:2014:MGA, author = "Vikas and Nasser Giacaman and Oliver Sinnen", title = "Multiprocessing with {GUI}-awareness using {OpenMP}-like directives in {Java}", journal = j-PARALLEL-COMPUTING, volume = "40", number = "2", pages = "69--89", month = feb, year = "2014", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 28 06:47:16 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001439", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Wang:2014:IPD, author = "Zheng Wang and Georgios Tournavitis and Bj{\"o}rn Franke and Michael F. P. O'boyle", title = "Integrating profile-driven parallelism detection and machine-learning-based mapping", journal = j-TACO, volume = "11", number = "1", pages = "2:1--2:??", month = feb, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2579561", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Mar 14 17:30:52 MDT 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Compiler-based auto-parallelization is a much-studied area but has yet to find widespread application. This is largely due to the poor identification and exploitation of application parallelism, resulting in disappointing performance far below that which a skilled expert programmer could achieve. We have identified two weaknesses in traditional parallelizing compilers and propose a novel, integrated approach resulting in significant performance improvements of the generated parallel code. Using profile-driven parallelism detection, we overcome the limitations of static analysis, enabling the identification of more application parallelism, and only rely on the user for final approval. We then replace the traditional target-specific and inflexible mapping heuristics with a machine-learning-based prediction mechanism, resulting in better mapping decisions while automating adaptation to different target architectures. We have evaluated our parallelization strategy on the NAS and SPEC CPU2000 benchmarks and two different multicore platforms (dual quad-core Intel Xeon SMP and dual-socket QS20 Cell blade). We demonstrate that our approach not only yields significant improvements when compared with state-of-the-art parallelizing compilers but also comes close to and sometimes exceeds the performance of manually parallelized codes. On average, our methodology achieves 96\% of the performance of the hand-tuned OpenMP NAS and SPEC parallel benchmarks on the Intel Xeon platform and gains a significant speedup for the IBM Cell platform, demonstrating the potential of profile-guided and machine-learning- based parallelization for complex multicore platforms.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Wu:2014:MAG, author = "Xing Wu and Frank Mueller and Scott Pakin", title = "A methodology for automatic generation of executable communication specifications from parallel {MPI} applications", journal = j-TOPC, volume = "1", number = "1", pages = "6:1--6:??", month = sep, year = "2014", CODEN = "????", DOI = "https://doi.org/10.1145/2660249", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Oct 17 12:28:03 MDT 2014", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Portable parallel benchmarks are widely used for performance evaluation of HPC systems. However, because these are manually produced, they generally represent a greatly simplified view of application behavior, missing the subtle but important-to-performance nuances that may exist in a complete application. This work contributes novel methods to automatically generate highly portable and customizable communication benchmarks from HPC applications. We utilize ScalaTrace, a lossless yet scalable parallel-application tracing framework to collect selected aspects of the run-time behavior of HPC applications, including communication operations and computation time, while abstracting away the details of the computation proper. We subsequently generate benchmarks with nearly identical run-time behavior to the original applications. Results demonstrate that the generated benchmarks are in fact able to preserve the run-time behavior (including both the communication pattern and the execution time) of the original applications. Such automated benchmark generation is without precedent and particularly valuable for proprietary, export-controlled, or classified application codes.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Wu:2014:OFB, author = "Jing Wu and Joseph JaJa and Elias Balaras", title = "An Optimized {FFT}-Based Direct {Poisson} Solver on {CUDA GPUs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "25", number = "3", pages = "550--559", month = mar, year = "2014", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2013.53", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Mon Aug 25 07:12:16 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Yan:2014:OMB, author = "Xin Yan and Xiaohua Shi and Lina Wang and Haiyan Yang", title = "An {OpenCL} micro-benchmark suite for {GPUs} and {CPUs}", journal = j-J-SUPERCOMPUTING, volume = "69", number = "2", pages = "693--713", month = aug, year = "2014", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1112-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Sep 10 06:45:09 MDT 2014", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=69&issue=2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1112-2", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Yang:2014:CNR, author = "Yi Yang and Huiyang Zhou", title = "{CUDA-NP}: realizing nested thread-level parallelism in {GPGPU} applications", journal = j-SIGPLAN, volume = "49", number = "8", pages = "93--106", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555254", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Parallel programs consist of series of code sections with different thread-level parallelism (TLP). As a result, it is rather common that a thread in a parallel program, such as a GPU kernel in CUDA programs, still contains both sequential code and parallel loops. In order to leverage such parallel loops, the latest Nvidia Kepler architecture introduces dynamic parallelism, which allows a GPU thread to start another GPU kernel, thereby reducing the overhead of launching kernels from a CPU. However, with dynamic parallelism, a parent thread can only communicate with its child threads through global memory and the overhead of launching GPU kernels is non-trivial even within GPUs. In this paper, we first study a set of GPGPU benchmarks that contain parallel loops, and highlight that these bench-marks do not have a very high loop count or high degrees of TLP. Consequently, the benefits of leveraging such parallel loops using dynamic parallelism are too limited to offset its overhead. We then present our proposed solution to exploit nested parallelism in CUDA, referred to as CUDA-NP. With CUDA-NP, we initially enable a high number of threads when a GPU program starts, and use control flow to activate different numbers of threads for different code sections. We implemented our proposed CUDA-NP framework using a directive-based compiler approach. For a GPU kernel, an application developer only needs to add OpenMP-like pragmas for parallelizable code sections. Then, our CUDA-NP compiler automatically generates the optimized GPU kernels. It supports both the reduction and the scan primitives, explores different ways to distribute parallel loop iterations into threads, and efficiently manages on-chip resource. Our experiments show that for a set of GPGPU benchmarks, which have already been optimized and contain nested parallelism, our pro-posed CUDA-NP framework further improves the performance by up to 6.69 times and 2.18 times on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Yang:2014:HPD, author = "Luobin Yang and Steve C. Chiu and Wei-Keng Liao", title = "High performance data clustering: a comparative analysis of performance for {GPU}, {RASC}, {MPI}, and {OpenMP} implementations", journal = j-J-SUPERCOMPUTING, volume = "70", number = "1", pages = "284--300", month = oct, year = "2014", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-013-0906-y", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri Feb 13 12:32:14 MST 2015", bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=70&issue=1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-013-0906-y", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Yang:2014:IMP, author = "Xu Yang and Deyuan Guo and Hu He and Haijing Tang and Yanjun Zhang", title = "An Implementation of {Message-Passing Interface} over {VxWorks} for Real-Time Embedded Multi-Core Systems", journal = j-COMP-J, volume = "57", number = "11", pages = "1756--1764", month = nov, year = "2014", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxt152", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Mon Oct 27 08:54:43 MDT 2014", bibsource = "http://comjnl.oxfordjournals.org/content/57/11.toc; https://www.math.utah.edu/pub/tex/bib/compj2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://comjnl.oxfordjournals.org/content/57/11/1756", acknowledgement = ack-nhfb, fjournal = "Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", onlinedate = "January 3, 2014", } @Article{Yang:2014:PMI, author = "Chaoran Yang and Wesley Bland and John Mellor-Crummey and Pavan Balaji", title = "Portable, {MPI}-interoperable {Coarray Fortran}", journal = j-SIGPLAN, volume = "49", number = "8", pages = "81--92", month = aug, year = "2014", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2692916.2555270", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Nov 26 16:26:30 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The past decade has seen the advent of a number of parallel programming models such as Coarray Fortran (CAF), Unified Parallel C, X10, and Chapel. Despite the productivity gains promised by these models, most parallel scientific applications still rely on MPI as their data movement model. One reason for this trend is that it is hard for users to incrementally adopt these new programming models in existing MPI applications. Because each model use its own runtime system, they duplicate resources and are potentially error-prone. Such independent runtime systems were deemed necessary because MPI was considered insufficient in the past to play this role for these languages. The recently released MPI-3, however, adds several new capabilities that now provide all of the functionality needed to act as a runtime, including a much more comprehensive one-sided communication framework. In this paper, we investigate how MPI-3 can form a runtime system for one example programming model, CAF, with a broader goal of enabling a single application to use both MPI and CAF with the highest level of interoperability.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '14 conference proceedings.", } @Article{Zheng:2014:IMS, author = "Liang Zheng and Huai Zhang and Taras Gerya and Matthew Knepley and David A. Yuen and Yaolin Shi", title = "Implementation of a multigrid solver on a {GPU} for {Stokes} equations with strongly variable viscosity based on {Matlab} and {CUDA}", journal = j-IJHPCA, volume = "28", number = "1", pages = "50--60", month = feb, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342013478640", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Mar 14 15:39:59 MDT 2014", bibsource = "http://hpc.sagepub.com/content/28/1.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/1/50.full.pdf+html", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "March 5, 2013", } @Article{Zounmevo:2014:ESC, author = "Judicael A. Zounmevo and Dries Kimpe and Robert Ross and Ahmad Afsahi", title = "Extreme-scale computing services over {MPI}: Experiences, observations and features proposal for next-generation message passing interface", journal = j-IJHPCA, volume = "28", number = "4", pages = "435--449", month = nov, year = "2014", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342014548864", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Fri Feb 13 09:17:23 MST 2015", bibsource = "http://hpc.sagepub.com/content/28/4.toc; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://hpc.sagepub.com/content/28/4/435", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", onlinedate = "September 10, 2014", } @Article{Zounmevo:2014:FRC, author = "Judicael A. Zounmevo and Ahmad Afsahi", title = "A fast and resource-conscious {MPI} message queue mechanism for large-scale jobs", journal = j-FUT-GEN-COMP-SYS, volume = "30", number = "??", pages = "265--290", month = jan, year = "2014", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Dec 2 16:57:46 MST 2013", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.sciencedirect.com/science/journal/0167739X", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X13001489", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Agosta:2015:OPP, author = "Giovanni Agosta and Alessandro Barenghi and Alessandro {Di Federico} and Gerardo Pelosi", title = "{OpenCL} performance portability for general-purpose computation on graphics processor units: an exploration on cryptographic primitives", journal = j-CCPE, volume = "27", number = "14", pages = "3633--3660", day = "25", month = sep, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3358", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Sep 28 09:32:54 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "29 Aug 2014", } @Article{Al-Mouhamed:2015:EAO, author = "Mayez Al-Mouhamed and Ayaz ul Hassan Khan", title = "Exploration of automatic optimisation for {CUDA} programming", journal = j-INT-J-PAR-EMER-DIST-SYS, volume = "30", number = "4", pages = "309--324", year = "2015", CODEN = "????", DOI = "https://doi.org/10.1080/17445760.2014.953158", ISSN = "1744-5760 (print), 1744-5779 (electronic)", ISSN-L = "1744-5760", bibdate = "Tue Sep 15 07:34:54 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.tandfonline.com/toc/gpaa20/30/4", URL = "http://www.tandfonline.com/doi/abs/10.1080/17445760.2014.953158", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel, Emergent and Distributed Systems: IJPEDS", journal-URL = "http://www.tandfonline.com/loi/gpaa20", } @Article{Amer:2015:MRC, author = "Abdelhalim Amer and Huiwei Lu and Yanjie Wei and Pavan Balaji and Satoshi Matsuoka", title = "{MPI+Threads}: runtime contention and remedies", journal = j-SIGPLAN, volume = "50", number = "8", pages = "239--248", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688522", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hybrid MPI+Threads programming has emerged as an alternative model to the ``MPI everywhere'' model to better handle the increasing core density in cluster nodes. While the MPI standard allows multithreaded concurrent communication, such flexibility comes with the cost of maintaining thread safety within the MPI implementation, typically implemented using critical sections. In contrast to previous works that studied the importance of critical-section granularity in MPI implementations, in this paper we investigate the implication of critical-section arbitration on communication performance. We first analyze the MPI runtime when multithreaded concurrent communication takes place on hierarchical memory systems. Our results indicate that the mutex-based approach that most MPI implementations use today can incur performance penalties due to unfair arbitration. We then present methods to mitigate these penalties with a first-come, first-served arbitration and a priority locking scheme that favors threads doing useful work. Through evaluations using several benchmarks and applications, we demonstrate up to 5-fold improvement in performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Balasubramanian:2015:EGL, author = "Raghuraman Balasubramanian and Vinay Gangadhar and Ziliang Guo and Chen-Han Ho and Cherin Joseph and Jaikrishnan Menon and Mario Paulo Drumond and Robin Paul and Sharath Prasad and Pradip Valathol and Karthikeyan Sankaralingam", title = "Enabling {GPGPU} Low-Level Hardware Explorations with {MIAOW}: an Open-Source {RTL} Implementation of a {GPGPU}", journal = j-TACO, volume = "12", number = "2", pages = "21:1--21:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2764908", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Aug 7 09:46:00 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Graphic processing unit (GPU)-based general-purpose computing is developing as a viable alternative to CPU-based computing in many domains. Today's tools for GPU analysis include simulators like GPGPU-Sim, Multi2Sim, and Barra. While useful for modeling first-order effects, these tools do not provide a detailed view of GPU microarchitecture and physical design. Further, as GPGPU research evolves, design ideas and modifications demand detailed estimates of impact on overall area and power. Fueled by this need, we introduce MIAOW (Many-core Integrated Accelerator Of Wisconsin), an open-source RTL implementation of the AMD Southern Islands GPGPU ISA, capable of running unmodified OpenCL-based applications. We present our design motivated by our goals to create a realistic, flexible, OpenCL-compatible GPGPU, capable of emulating a full system. We first explore if MIAOW is realistic and then use four case studies to show that MIAOW enables the following: physical design perspective to ``traditional'' microarchitecture, new types of research exploration, and validation/calibration of simulator-based characterization of hardware. The findings and ideas are contributions in their own right, in addition to MIAOW's utility as a tool for others' research.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Betts:2015:DIV, author = "Adam Betts and Nathan Chong and Alastair F. Donaldson and Jeroen Ketema and Shaz Qadeer and Paul Thomson and John Wickerson", title = "The Design and Implementation of a Verification Technique for {GPU} Kernels", journal = j-TOPLAS, volume = "37", number = "3", pages = "10:1--10:??", month = jun, year = "2015", CODEN = "ATPSDT", DOI = "https://doi.org/10.1145/2743017", ISSN = "0164-0925 (print), 1558-4593 (electronic)", ISSN-L = "0164-0925", bibdate = "Fri Jun 19 05:36:55 MDT 2015", bibsource = "http://www.acm.org/pubs/contents/journals/toplas/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toplas.bib", abstract = "We present a technique for the formal verification of GPU kernels, addressing two classes of correctness properties: data races and barrier divergence. Our approach is founded on a novel formal operational semantics for GPU kernels termed {\em synchronous, delayed visibility (SDV)\/} semantics, which captures the execution of a GPU kernel by multiple groups of threads. The SDV semantics provides operational definitions for barrier divergence and for both inter- and intra-group data races. We build on the semantics to develop a method for reducing the task of verifying a massively parallel GPU kernel to that of verifying a sequential program. This completely avoids the need to reason about thread interleavings, and allows existing techniques for sequential program verification to be leveraged. We describe an efficient encoding of data race detection and propose a method for automatically inferring the loop invariants that are required for verification. We have implemented these techniques as a practical verification tool, GPUVerify, that can be applied directly to OpenCL and CUDA source code. We evaluate GPUVerify with respect to a set of 162 kernels drawn from public and commercial sources. Our evaluation demonstrates that GPUVerify is capable of efficient, automatic verification of a large number of real-world kernels.", acknowledgement = ack-nhfb, articleno = "10", fjournal = "ACM Transactions on Programming Languages and Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783", } @Article{Bukata:2015:SRC, author = "Libor Bukata and Premysl Sucha and Zdenek Hanz{\'a}lek", title = "Solving the Resource Constrained Project Scheduling Problem using the parallel Tabu Search designed for the {CUDA} platform", journal = j-J-PAR-DIST-COMP, volume = "77", number = "??", pages = "58--68", month = mar, year = "2015", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon Mar 2 12:05:20 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731514002226", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315/", } @Article{Busa:2015:CCO, author = "J{\'a}n {Busa, Jr.} and J{\'a}n Busa and Shura Hayryan and Chin-Kun Hu and Ming-Chya Wu", title = "{CAVE-CL}: an {OpenCL} version of the package for detection and quantitative analysis of internal cavities in a system of overlapping balls: Application to proteins", journal = j-COMP-PHYS-COMM, volume = "190", number = "??", pages = "224--227", month = may, year = "2015", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Mar 4 08:31:43 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465514004378", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Carretero:2015:AMM, author = "Jesus Carretero and Javier Garcia-Blas and David E. Singh and Florin Isaila and Alexey Lastovetsky and Thomas Fahringer and Radu Prodan and Peter Zangerl and Christi Symeonidou and Afshin Fassihi and Horacio P{\'e}rez-S{\'a}nchez", title = "Acceleration of {MPI} mechanisms for sustainable {HPC} applications", journal = j-SUPERFRI, volume = "2", number = "2", pages = "28--45", month = "????", year = "2015", CODEN = "????", ISSN = "2409-6008 (print), 2313-8734 (electronic)", bibdate = "Sat Nov 11 07:15:27 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/superfri.bib", URL = "http://superfri.org/superfri/article/view/35", acknowledgement = ack-nhfb, fjournal = "Supercomputing Frontiers and Innovations", journal-URL = "http://superfri.org/superfri/issue/archive", } @Article{Casanova:2015:SMA, author = "Henri Casanova and Fr{\'e}d{\'e}ric Desprez and George S. Markomanolis and Fr{\'e}d{\'e}ric Suter", title = "Simulation of {MPI} applications with time-independent traces", journal = j-CCPE, volume = "27", number = "5", pages = "1145--1168", day = "10", month = apr, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3278", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Jul 25 19:54:07 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "11 Apr 2014", } @Article{Casanova:2015:TMS, author = "Henri Casanova and Anshul Gupta and Fr{\'e}d{\'e}ric Suter", title = "Toward More Scalable Off-Line Simulations of {MPI} Applications", journal = j-PARALLEL-PROCESS-LETT, volume = "25", number = "3", pages = "1541002", month = sep, year = "2015", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626415410029", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Tue May 29 09:05:25 MDT 2018", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Cercos-Pita:2015:ANF, author = "J. L. Cercos-Pita", title = "{AQUAgpusph}, a new free {$3$D} {SPH} solver accelerated with {OpenCL}", journal = j-COMP-PHYS-COMM, volume = "192", number = "??", pages = "295--312", month = jul, year = "2015", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Apr 21 11:56:04 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000909", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Chabbi:2015:BEP, author = "Milind Chabbi and Wim Lavrijsen and Wibe de Jong and Koushik Sen and John Mellor-Crummey and Costin Iancu", title = "Barrier elision for production parallel programs", journal = j-SIGPLAN, volume = "50", number = "8", pages = "109--119", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688502", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Large scientific code bases are often composed of several layers of runtime libraries, implemented in multiple programming languages. In such situation, programmers often choose conservative synchronization patterns leading to suboptimal performance. In this paper, we present context-sensitive dynamic optimizations that elide barriers redundant during the program execution. In our technique, we perform data race detection alongside the program to identify redundant barriers in their calling contexts; after an initial learning, we start eliding all future instances of barriers occurring in the same calling context. We present an automatic on-the-fly optimization and a multi-pass guided optimization. We apply our techniques to NWChem--a 6 million line computational chemistry code written in C/C++/Fortran that uses several runtime libraries such as Global Arrays, ComEx, DMAPP, and MPI. Our technique elides a surprisingly high fraction of barriers (as many as 63\%) in production runs. This redundancy elimination translates to application speedups as high as 14\% on 2048 cores. Our techniques also provided valuable insight about the application behavior, later used by NWChem developers. Overall, we demonstrate the value of holistic context-sensitive analyses that consider the domain science in conjunction with the associated runtime software stack.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Cho:2015:OAO, author = "Myeongjin Cho and Youngsun Han and Minseong Kim and Seon Wook Kim", title = "{O2WebCL}: an automatic {OpenCL-to-WebCL} translator for high performance web computing", journal = j-J-SUPERCOMPUTING, volume = "71", number = "6", pages = "2050--2065", month = jun, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1260-4", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Aug 8 12:23:11 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/6; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1260-4", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Couder-Castaneda:2015:PCM, author = "C. Couder-Casta{\~n}eda and H. Barrios-Pi{\~n}a and I. Gitler and M. Arroyo", title = "Performance of a Code Migration for the Simulation of Supersonic Ejector Flow to {SMP}, {MIC}, and {GPU} Using {OpenMP}, {OpenMP+LEO}, and {OpenACC} Directives", journal = j-SCI-PROG, volume = "2015", number = "??", pages = "739107:1--739107:20", month = "????", year = "2015", CODEN = "SCIPEV", DOI = "https://doi.org/10.1155/2015/739107", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 20 07:53:44 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", URL = "https://www.hindawi.com/journals/sp/2015/739107/", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "https://www.hindawi.com/journals/sp/", journalabr = "Sci. Prog", } @Article{Ebrahimirad:2015:EAS, author = "Vahid Ebrahimirad and Maziar Goudarzi and Aboozar Rajabi", title = "Energy-Aware Scheduling for Precedence-Constrained Parallel Virtual Machines in Virtualized Data Centers", journal = j-J-GRID-COMP, volume = "13", number = "2", pages = "233--253", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1007/s10723-015-9327-x", ISSN = "1570-7873 (print), 1572-9184 (electronic)", ISSN-L = "1570-7873", bibdate = "Sat Aug 8 12:08:29 MDT 2015", bibsource = "http://link.springer.com/journal/10723/13/2; https://www.math.utah.edu/pub/tex/bib/jgridcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "http://link.springer.com/article/10.1007/s10723-015-9327-x", acknowledgement = ack-nhfb, fjournal = "Journal of Grid Computing", journal-URL = "http://link.springer.com/journal/10723", } @Article{Emani:2015:CDM, author = "Murali Krishna Emani and Michael O'Boyle", title = "Celebrating diversity: a mixture of experts approach for runtime mapping in dynamic environments", journal = j-SIGPLAN, volume = "50", number = "6", pages = "499--508", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737999", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Matching program parallelism to platform parallelism using thread selection is difficult when the environment and available resources dynamically change. Existing compiler or runtime approaches are typically based on a one-size fits all policy. There is little ability to either evaluate or adapt the policy when encountering new external workloads or hardware resources. This paper focuses on selecting the best number of threads for a parallel application in dynamic environments. It develops a new scheme based on a mixture of experts approach. It learns online which, of a number of existing policies, or experts, is best suited for a particular environment without having to try out each policy. It does this by using a novel environment predictor as a proxy for the quality of an expert thread selection policy. Additional expert policies can easily be added and are selected only when appropriate. We evaluate our scheme in environments with varying external workloads and hardware resources.We then consider the case when workloads use affinity scheduling or are themselves adaptive and show that our approach, in all cases, outperforms existing schemes and surprisingly improves workload performance. On average, we improve 1.66x over OpenMP default, 1.34x over an online scheme, 1.25x over an offline policy and 1.2x over a state-of-art analytic model. Determining the right number and type of experts is an open problem and our initial analysis shows that adding more experts improves accuracy and performance.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '15 conference proceedings.", } @Article{Fabeiro:2015:AGO, author = "Jorge F. Fabeiro and Diego Andrade and Basilio B. Fraguela and Ram{\'o}n Doallo", title = "Automatic Generation of Optimized {OpenCL} Codes Using {OCLoptimizer}", journal = j-COMP-J, volume = "58", number = "11", pages = "3057--3073", month = nov, year = "2015", CODEN = "CMPJA6", DOI = "https://doi.org/10.1093/comjnl/bxv038", ISSN = "0010-4620 (print), 1460-2067 (electronic)", ISSN-L = "0010-4620", bibdate = "Tue Nov 17 08:06:33 MST 2015", bibsource = "http://comjnl.oxfordjournals.org/content/58/11.toc; https://www.math.utah.edu/pub/tex/bib/compj2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computer Journal", journal-URL = "http://comjnl.oxfordjournals.org/", onlinedate = "June 2, 2015", } @Article{Fang:2015:EVD, author = "Jianbin Fang and Ana Lucia Varbanescu and Xiangke Liao and Henk Sips", title = "Evaluating vector data type usage in {OpenCL} kernels", journal = j-CCPE, volume = "27", number = "17", pages = "4586--4602", day = "10", month = dec, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3424", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:20 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "23 Oct 2014", } @Article{Ferretti:2015:MCH, author = "Marco Ferretti and Mirto Musci and Luigi Santangelo", title = "{MPI--CMS}: a hybrid parallel approach to geometrical motif search in proteins", journal = j-CCPE, volume = "27", number = "18", pages = "5500--5516", day = "25", month = dec, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3588", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:20 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "12 Aug 2015", } @Article{Filipovic:2015:OCC, author = "Jir{\'\i} Filipovic and Mat{\'u}s Madzin and Jan Fousek and Ludek Matyska", title = "Optimizing {CUDA} code by kernel fusion: application on {BLAS}", journal = j-J-SUPERCOMPUTING, volume = "71", number = "10", pages = "3934--3957", month = oct, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-015-1483-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Sep 29 10:07:24 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/10; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-015-1483-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Galizia:2015:MCL, author = "Antonella Galizia and Daniele D'Agostino and Andrea Clematis", title = "An {MPI--CUDA} library for image processing on {HPC} architectures", journal = j-J-COMPUT-APPL-MATH, volume = "273", number = "??", pages = "414--427", day = "1", month = jan, year = "2015", CODEN = "JCAMDI", ISSN = "0377-0427 (print), 1879-1778 (electronic)", ISSN-L = "0377-0427", bibdate = "Sat Feb 25 13:34:46 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputapplmath2015.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0377042714002374", acknowledgement = ack-nhfb, fjournal = "Journal of Computational and Applied Mathematics", journal-URL = "http://www.sciencedirect.com/science/journal/03770427", } @Article{Garain:2015:CCF, author = "Sudip Garain and Dinshaw S. Balsara and John Reid", title = "Comparing {Coarray Fortran (CAF)} with {MPI} for several structured mesh {PDE} applications", journal = j-J-COMPUT-PHYS, volume = "297", number = "??", pages = "237--253", day = "15", month = sep, year = "2015", CODEN = "JCTPAH", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Sat Jul 25 09:25:55 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S002199911500354X", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991/", } @Article{Gidra:2015:NGC, author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and Marc Shapiro and Nhan Nguyen", title = "{NumaGiC}: a Garbage Collector for Big Data on Big {NUMA} Machines", journal = j-COMP-ARCH-NEWS, volume = "43", number = "1", pages = "661--673", month = mar, year = "2015", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2786763.2694361", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Wed Jun 3 11:27:38 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "On contemporary cache-coherent Non-Uniform Memory Access (ccNUMA) architectures, applications with a large memory footprint suffer from the cost of the garbage collector (GC), because, as the GC scans the reference graph, it makes many remote memory accesses, saturating the interconnect between memory nodes. We address this problem with NumaGiC, a GC with a mostly-distributed design. In order to maximise memory access locality during collection, a GC thread avoids accessing a different memory node, instead notifying a remote GC thread with a message; nonetheless, NumaGiC avoids the drawbacks of a pure distributed design, which tends to decrease parallelism. We compare NumaGiC with Parallel Scavenge and NAPS on two different ccNUMA architectures running on the Hotspot Java Virtual Machine of OpenJDK 7. On Spark and Neo4j, two industry-strength analytics applications, with heap sizes ranging from 160GB to 350GB, and on SPECjbb2013 and SPECjbb2005, ourgc improves overall performance by up to 45\% over NAPS (up to 94\% over Parallel Scavenge), and increases the performance of the collector itself by up to 3.6x over NAPS (up to 5.4x over Parallel Scavenge).", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "ASPLOS'15 conference proceedings.", } @Article{Havran:2015:EBT, author = "Vlastimil Havran and Petr Egert", title = "Extensions to bidirectional texture function compression with multi-level vector quantization in {OpenCL}", journal = j-COMPUTERS-AND-GRAPHICS, volume = "48", number = "??", pages = "1--10", month = may, year = "2015", CODEN = "COGRD2", ISSN = "0097-8493 (print), 1873-7684 (electronic)", ISSN-L = "0097-8493", bibdate = "Fri Apr 24 17:46:30 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compgraph.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0097849315000060", acknowledgement = ack-nhfb, fjournal = "Computers \& Graphics", journal-URL = "http://www.sciencedirect.com/science/journal/00978493/", } @Article{Hoefler:2015:RMA, author = "Torsten Hoefler and James Dinan and Rajeev Thakur and Brian Barrett and Pavan Balaji and William Gropp and Keith Underwood", title = "Remote Memory Access Programming in {MPI-3}", journal = j-TOPC, volume = "2", number = "2", pages = "9:1--9:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2780584", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Aug 7 10:22:35 MDT 2015", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "The Message Passing Interface (MPI) 3.0 standard, introduced in September 2012, includes a significant update to the one-sided communication interface, also known as remote memory access (RMA). In particular, the interface has been extended to better support popular one-sided and global-address-space parallel programming models to provide better access to hardware performance features and enable new data-access modes. We present the new RMA interface and specify formal axiomatic models for data consistency and access semantics. Such models can help users reason about details of the semantics that are hard to extract from the English prose in the standard. It also fosters the development of tools and compilers, enabling them to automatically analyze, optimize, and debug RMA programs.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Jaaskelainen:2015:PPP, author = "Pekka J{\"a}{\"a}skel{\"a}inen and Carlos {S{\'a}nchez de La Lama} and Erik Schnetter and Kalle Raiskila and Jarmo Takala and Heikki Berg", title = "{pocl}: A Performance-Portable {OpenCL} Implementation", journal = j-INT-J-PARALLEL-PROG, volume = "43", number = "5", pages = "752--785", month = oct, year = "2015", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-014-0320-y", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Aug 8 12:34:17 MDT 2015", bibsource = "http://link.springer.com/journal/10766/43/5; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-014-0320-y", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Jaeger:2015:FGD, author = "Julien Jaeger and Patrick Carribault and Marc P{\'e}rache", title = "Fine-grain data management directory for {OpenMP 4.0} and {OpenACC}", journal = j-CCPE, volume = "27", number = "6", pages = "1528--1539", day = "25", month = apr, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3352", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Jul 25 19:54:07 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "13 Aug 2014", } @Article{Jo:2015:ALM, author = "Gangwon Jo and Jeongho Nah and Jun Lee and Jungwon Kim and Jaejin Lee", title = "Accelerating {LINPACK} with {MPI-OpenCL} on Clusters of Multi-{GPU} Nodes", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "26", number = "7", pages = "1814--1825", month = jul, year = "2015", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2014.2321742", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Mon Aug 3 11:58:51 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2015/07/06846313-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2015/07/06846313-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Kaliman:2015:SNU, author = "Ilya A. Kaliman and Lyudmila V. Slipchenko", title = "Software News and Updates: Hybrid {MPI\slash OpenMP} parallelization of the effective fragment potential method in the {{\tt libefp}} software library", journal = j-J-COMPUT-CHEM, volume = "36", number = "2", pages = "129--135", day = "15", month = jan, year = "2015", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.23772", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Fri Mar 6 15:50:38 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651", onlinedate = "13 Nov 2014", } @Article{Karami:2015:SPA, author = "Ali Karami and Farshad Khunjush and Seyyed Ali Mirsoleimani", title = "A statistical performance analyzer framework for {OpenCL} kernels on {Nvidia GPUs}", journal = j-J-SUPERCOMPUTING, volume = "71", number = "8", pages = "2900--2921", month = aug, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1338-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Aug 8 12:23:12 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/8; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1338-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Kim:2015:OBU, author = "Jungwon Kim and Seyong Lee and Jeffrey S. Vetter", title = "An {OpenACC}-based unified programming model for multi-accelerator systems", journal = j-SIGPLAN, volume = "50", number = "8", pages = "257--258", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688531", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper proposes a novel SPMD programming model of OpenACC. Our model integrates the different granularities of parallelism from vector-level parallelism to node-level parallelism into a single, unified model based on OpenACC. It allows programmers to write programs for multiple accelerators using a uniform programming model whether they are in shared or distributed memory systems. We implement a prototype of our model and evaluate its performance with a GPU-based supercomputer using three benchmark applications.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @InProceedings{Klawonn:2015:HMO, author = "Axel Klawonn and Martin Lanser and Oliver Rheinbach and Holger Stengel and Gerhard Wellein", title = "Hybrid {MPI\slash OpenMP} Parallelization in {FETI--DP} Methods", crossref = "Mehl:2015:RTC", volume = "105", pages = "67--84", year = "2015", DOI = "https://doi.org/10.1007/978-3-319-22997-3_4", bibdate = "Sat Dec 12 10:22:10 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/chapter/10.1007/978-3-319-22997-3_4/", acknowledgement = ack-nhfb, book-DOI = "https://doi.org/10.1007/978-3-319-22997-3", book-URL = "http://www.springerlink.com/content/978-3-319-22997-3", } @Article{Komura:2015:OPS, author = "Yukihiro Komura", title = "{OpenACC} programs of the {Swendsen--Wang} multi-cluster spin flip algorithm", journal = j-COMP-PHYS-COMM, volume = "197", number = "??", pages = "298--303", month = dec, year = "2015", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Nov 11 06:05:22 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515003197", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Kouzinopoulos:2015:MSM, author = "Charalampos S. Kouzinopoulos and Panagiotis D. Michailidis and Konstantinos G. Margaritis", title = "Multiple String Matching on a {GPU} using {CUDAs}", journal = j-SCPE, volume = "16", number = "2", pages = "121--138", month = "????", year = "2015", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:46 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib; https://www.math.utah.edu/pub/tex/bib/string-matching.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1085", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Kovanen:2015:TAC, author = "Janne Kovanen and Tapani Sarjakoski", title = "Tilewise Accumulated Cost Surface Computation with Graphics Processing Units", journal = j-TSAS, volume = "1", number = "2", pages = "8:1--8:27", month = nov, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2803172", ISSN = "2374-0353 (print), 2374-0361 (electronic)", ISSN-L = "2374-0353", bibdate = "Thu Jun 15 14:51:01 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tsas.bib", URL = "http://dl.acm.org/citation.cfm?id=2803172", abstract = "Accumulated cost surfaces are used in a variety of fields that employ spatial analysis. Several algorithms have been suggested in the past for solving them efficiently or with minimal errors. Meanwhile, a new wave on the technological frontier has brought about general-purpose computing on GPUs. In this article, we describe how accumulated cost surfaces can be solved with CUDA. To verify the performance of our solution, we performed an experimental comparison against implementations run on a CPU. Our results with realistic cost models indicate that the move to GPUs can engender a speed-up of an order of magnitude.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Spatial Algorithms and Systems (TSAS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1514", } @Article{Kramer:2015:SET, author = "Stephan C. Kramer and Johannes Hagemann", title = "{SciPAL}: Expression Templates and Composition Closure Objects for High Performance Computational Physics with {CUDA} and {OpenMP}", journal = j-TOPC, volume = "1", number = "2", pages = "15:1--15:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2686886", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Wed Feb 18 16:46:00 MST 2015", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "We present SciPAL (scientific parallel algorithms library), a C ++-based, hardware-independent open-source library. Its core is a domain-specific embedded language for numerical linear algebra. The main fields of application are finite element simulations, coherent optics and the solution of inverse problems. Using SciPAL algorithms can be stated in a mathematically intuitive way in terms of matrix and vector operations. Existing algorithms can easily be adapted to GPU-based computing by proper template specialization. Our library is compatible with the finite element library deal .II and provides a port of deal.II's most frequently used linear algebra classes to CUDA (NVidia's extension of the programming languages C and C ++ for programming their GPUs). SciPAL 's operator-based API for BLAS operations particularly aims at simplifying the usage of NVidia's CUBLAS. For non-BLAS array arithmetic SciPAL 's expression templates are able to generate CUDA kernels at compile time. We demonstrate the benefits of SciPAL using the iterative principal component analysis as example which is the core algorithm for the spike-sorting problem in neuroscience.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Laguna:2015:DPF, author = "Ignacio Laguna and Dong H. Ahn and Bronis R. de Supinski and Saurabh Bagchi and Todd Gamblin", title = "Diagnosis of Performance Faults in {LargeScale} {MPI} Applications via Probabilistic Progress-Dependence Inference", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "26", number = "5", pages = "1280--1289", month = may, year = "2015", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2014.2314100", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Jun 4 19:34:11 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://csdl.computer.org/csdl/trans/td/2015/05/06803050-abs.html", abstract-URL = "http://csdl.computer.org/csdl/trans/td/2015/05/06803050-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Lashgar:2015:CSR, author = "Ahmad Lashgar and Ebad Salehi and Amirali Baniasadi", title = "A Case Study in Reverse Engineering {GPGPUs}: Outstanding Memory Handling Resources", journal = j-COMP-ARCH-NEWS, volume = "43", number = "4", pages = "15--21", month = sep, year = "2015", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2927964.2927968", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Fri Apr 22 17:03:53 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "During recent years, GPU micro-architectures have changed dramatically, evolving into powerful many-core deep-multithreaded platforms for parallel workloads. While important micro-architectural modifications continue to appear in every new generation of these processors, unfortunately, little is known about the details of these innovative designs. One of the key questions in understanding GPUs is how they deal with outstanding memory misses. Our goal in this study is to find answers to this question. To this end, we develop a set of micro-benchmarks in CUDA to understand the outstanding memory requests handling resources. Particularly, we study two NVIDIA GPGPUs (Fermi and Kepler) and estimate their capability in handling outstanding memory requests. We show that Kepler can issue nearly 32X higher number of outstanding memory requests, compared to Fermi. We explain this enhancement by Kepler's architectural modifications in outstanding memory request handling resources.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '15 conference proceedings.", } @Article{Lee:2015:GCE, author = "J. Lee and D. H. Woo and H. Kim and M. Azimi", title = "{GREEN} Cache: Exploiting the Disciplined Memory Model of {OpenCL} on {GPUs}", journal = j-IEEE-TRANS-COMPUT, volume = "64", number = "11", pages = "3167--3180", month = nov, year = "2015", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2015.2395435", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Tue Oct 13 06:51:52 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Lee:2015:OPE, author = "Joo Hwan Lee and Nimit Nigania and Hyesoon Kim and Kaushik Patel and Hyojong Kim", title = "{OpenCL} Performance Evaluation on Modern Multicore {CPUs}", journal = j-SCI-PROG, volume = "2015", number = "??", pages = "859491:1--859491:20", month = "????", year = "2015", CODEN = "SCIPEV", DOI = "https://doi.org/10.1155/2015/859491", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 20 07:53:44 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", URL = "https://www.hindawi.com/journals/sp/2015/859491/", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "https://www.hindawi.com/journals/sp/", journalabr = "Sci. Prog", } @Article{Li:2015:AMR, author = "Jiansen Li and Jianqi Sun and Ying Song and Jun Zhao", title = "Accelerating {MRI} reconstruction via three-dimensional dual-dictionary learning using {CUDA}", journal = j-J-SUPERCOMPUTING, volume = "71", number = "7", pages = "2381--2396", month = jul, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-015-1386-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Aug 8 12:23:11 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/7; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-015-1386-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Lidbury:2015:MCC, author = "Christopher Lidbury and Andrei Lascu and Nathan Chong and Alastair F. Donaldson", title = "Many-core compiler fuzzing", journal = j-SIGPLAN, volume = "50", number = "6", pages = "65--76", month = jun, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2813885.2737986", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:41 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We address the compiler correctness problem for many-core systems through novel applications of fuzz testing to OpenCL compilers. Focusing on two methods from prior work, random differential testing and testing via equivalence modulo inputs (EMI), we present several strategies for random generation of deterministic, communicating OpenCL kernels, and an injection mechanism that allows EMI testing to be applied to kernels that otherwise exhibit little or no dynamically-dead code. We use these methods to conduct a large, controlled testing campaign with respect to 21 OpenCL (device, compiler) configurations, covering a range of CPU, GPU, accelerator, FPGA and emulator implementations. Our study provides independent validation of claims in prior work related to the effectiveness of random differential testing and EMI testing, proposes novel methods for lifting these techniques to the many-core setting and reveals a significant number of OpenCL compiler bugs in commercial implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '15 conference proceedings.", } @Article{Lopez:2015:PBV, author = "Hugo A. L{\'o}pez and Eduardo R. B. Marques and Francisco Martins and Nicholas Ng and C{\'e}sar Santos and Vasco Thudichum Vasconcelos and Nobuko Yoshida", title = "Protocol-based verification of message-passing parallel programs", journal = j-SIGPLAN, volume = "50", number = "10", pages = "280--298", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814302", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present ParTypes, a type-based methodology for the verification of Message Passing Interface (MPI) programs written in the C programming language. The aim is to statically verify programs against protocol specifications, enforcing properties such as fidelity and absence of deadlocks. We develop a protocol language based on a dependent type system for message-passing parallel programs, which includes various communication operators, such as point-to-point messages, broadcast, reduce, array scatter and gather. For the verification of a program against a given protocol, the protocol is first translated into a representation read by VCC, a software verifier for C. We successfully verified several MPI programs in a running time that is independent of the number of processes or other input parameters. This contrasts with alternative techniques, notably model checking and runtime verification, that suffer from the state-explosion problem or that otherwise depend on parameters to the program itself. We experimentally evaluated our approach against state-of-the-art tools for MPI to conclude that our approach offers a scalable solution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "OOPSLA '15 conference proceedings.", } @Article{Lorentz:2015:AMS, author = "Istvan Lorentz and Razvan Andonie and Levente Fabry-Asztalos", title = "Accelerating Molecular Structure Determination Based on Inter-Atomic Distances Using {OpenCL}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "26", number = "12", pages = "3250--3263", month = dec, year = "2015", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2014.2385712", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Nov 17 06:28:07 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://csdl.computer.org/csdl/trans/td/2015/12/06995963-abs.html", abstract-URL = "http://csdl.computer.org/csdl/trans/td/2015/12/06995963-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Lotfi:2015:AAC, author = "Atieh Lotfi and Abbas Rahimi and Luca Benini and Rajesh K. Gupta", title = "Aging-Aware Compilation for {GP-GPUs}", journal = j-TACO, volume = "12", number = "2", pages = "24:1--24:??", month = jul, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2778984", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Aug 7 09:46:00 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "General-purpose graphic processing units (GP-GPUs) offer high computational throughput using thousands of integrated processing elements (PEs). These PEs are stressed during workload execution, and negative bias temperature instability (NBTI) adversely affects their reliability by introducing new delay-induced faults. However, the effect of these delay variations is not uniformly spread across the PEs: some are affected more --- hence less reliable --- than others. This variation causes significant reduction in the lifetime of GP-GPU parts. In this article, we address the problem of ``wear leveling'' across processing units to mitigate lifetime uncertainty in GP-GPUs. We propose innovations in the static compiled code that can improve healing in PEs and stream cores (SCs) based on their degradation status. PE healing is a fine-grained very long instruction word (VLIW) slot assignment scheme that balances the stress of instructions across the PEs within an SC. SC healing is a coarse-grained workload allocation scheme that distributes workload across SCs in GP-GPUs. Both schemes share a common property: they adaptively shift workload from less reliable units to more reliable units, either spatially or temporally. These software schemes are based on online calibration with NBTI monitoring that equalizes the expected lifetime of PEs and SCs by regenerating adaptive compiled codes to respond to the specific health state of the GP-GPUs. We evaluate the effectiveness of the proposed schemes for various OpenCL kernels from the AMD APP SDK on Evergreen and Southern Island GPU architectures. The aging-aware healthy kernels generated by the PE (or SC) healing scheme reduce NBTI-induced voltage threshold shift by 30\% (77\% in the case of SCs), with no (moderate) performance penalty compared to the naive kernels.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Markidis:2015:OAN, author = "Stefano Markidis and Jing Gong and Michael Schliephake and Erwin Laure and Alistair Hart and David Henty and Katherine Heisey and Paul Fischer", title = "{OpenACC} acceleration of the {Nek5000} spectral element code", journal = j-IJHPCA, volume = "29", number = "3", pages = "311--319", month = aug, year = "2015", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Apr 4 14:51:30 MDT 2017", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Martin:2015:EPM, author = "Gonzalo Mart{\'\i}n and David E. Singh and Maria-Cristina Marinescu and Jes{\'u}s Carretero", title = "Enhancing the performance of malleable {MPI} applications by using performance-aware dynamic reconfiguration", journal = j-PARALLEL-COMPUTING, volume = "46", number = "??", pages = "60--77", month = jul, year = "2015", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Jun 17 11:37:27 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819115000642", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Mehta:2015:MTP, author = "Kshitij Mehta and Edgar Gabriel", title = "Multi-Threaded Parallel {I/O} for {OpenMP} Applications", journal = j-INT-J-PARALLEL-PROG, volume = "43", number = "2", pages = "286--309", month = apr, year = "2015", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-014-0306-9", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Aug 8 12:34:16 MDT 2015", bibsource = "http://link.springer.com/journal/10766/43/2; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-014-0306-9", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Momeni:2015:EEO, author = "Amir Momeni and Hamed Tabkhi and Yash Ukidave and Gunar Schirner and David Kaeli", title = "Exploring the Efficiency of the {OpenCL} Pipe Semantic on an {FPGA}", journal = j-COMP-ARCH-NEWS, volume = "43", number = "4", pages = "52--57", month = sep, year = "2015", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2927964.2927974", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Fri Apr 22 17:03:53 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "This paper evaluates the potential benefits of leveraging the OpenCL Pipe semantic to accelerate FPGA-based applications. Our work focuses on streaming applications in the embedded vision processing domain. These applications are well-suited for concurrent kernel execution support and inter-kernel communication enabled by using OpenCL pipes. We analyze the impact of multiple design factors and application optimizations to improve the performance offered by OpenCL Pipes. The design tradeoffs considered include: the execution granularity across kernels, the rate and volume of data transfers, and the Pipe size. For our case study application of vision ow, we observe a 2.8X increase in throughput for tuned pipelined kernels, as compared to non-pipelined execution. In addition, we propose a novel mechanism to efficiently capture the behavior for 2-dimensional (2D) vision algorithms to benefit Pipe-based execution.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '15 conference proceedings.", } @Article{Muddukrishna:2015:LAT, author = "Ananya Muddukrishna and Peter A. Jonsson and Mats Brorsson", title = "Locality-Aware Task Scheduling and Data Distribution for {OpenMP} Programs on {NUMA} Systems and Manycore Processors", journal = j-SCI-PROG, volume = "2015", number = "??", pages = "981759:1--981759:16", month = "????", year = "2015", CODEN = "SCIPEV", DOI = "https://doi.org/10.1155/2015/981759", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 20 07:53:44 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", URL = "https://www.hindawi.com/journals/sp/2015/981759/", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "https://www.hindawi.com/journals/sp/", journalabr = "Sci. Prog", } @Article{Muralidharan:2015:COP, author = "Saurav Muralidharan and Michael Garland and Bryan Catanzaro and Albert Sidelnik and Mary Hall", title = "A collection-oriented programming model for performance portability", journal = j-SIGPLAN, volume = "50", number = "8", pages = "263--264", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688537", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper describes Surge, a collection-oriented programming model that enables programmers to compose parallel computations using nested high-level data collections and operators. Surge exposes a code generation interface, decoupled from the core computation, that enables programmers and autotuners to easily generate multiple implementations of the same computation on various parallel architectures such as multi-core CPUs and GPUs. By decoupling computations from architecture-specific implementation, programmers can target multiple architectures more easily, and generate a search space that facilitates optimization and customization for specific architectures. We express in Surge four real-world benchmarks from domains such as sparse linear-algebra and machine learning and from the same performance-portable specification, generate OpenMP and CUDA C++ implementations. Surge generates efficient, scalable code which achieves up to 1.32x speedup over handcrafted, well-optimized CUDA code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Obrecht:2015:PEO, author = "Christian Obrecht and Bernard Tourancheau and Fr{\'e}d{\'e}ric Kuznik", title = "Performance Evaluation of an {OpenCL} Implementation of the {Lattice Boltzmann Method} on the {Intel Xeon Phi}", journal = j-PARALLEL-PROCESS-LETT, volume = "25", number = "3", pages = "1541001", month = sep, year = "2015", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626415410017", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Tue May 29 09:05:25 MDT 2018", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Orr:2015:SUR, author = "Marc S. Orr and Shuai Che and Ayse Yilmazer and Bradford M. Beckmann and Mark D. Hill and David A. Wood", title = "Synchronization Using Remote-Scope Promotion", journal = j-COMP-ARCH-NEWS, volume = "43", number = "1", pages = "73--86", month = mar, year = "2015", CODEN = "CANED2", DOI = "https://doi.org/10.1145/2786763.2694350", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Wed Jun 3 11:27:38 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Heterogeneous system architecture (HSA) and OpenCL define scoped synchronization to facilitate low overhead communication across a subset of threads. Scoped synchronization works well for static sharing patterns, where consumer threads are known a priori. It works poorly for dynamic sharing patterns (e.g., work stealing) where programmers cannot use a faster small scope due to the rare possibility that the work is stolen by a thread in a distant slower scope. This puts programmers in a conundrum: optimize the common case by synchronizing at a faster small scope or use work stealing at a slower large scope. In this paper, we propose to extend scoped synchronization with remote-scope promotion. This allows the most frequent sharers to synchronize through a small scope. Infrequent sharers synchronize by promoting that remote small scope to a larger shared scope. Synchronization using remote-scope promotion provides performance robustness for dynamic workloads, where the benefits provided by scoped synchronization and work stealing are hard to anticipate. Compared to a na{\"\i}ve baseline, static scoped synchronization alone achieves a 1.07x speedup on average and dynamic work stealing alone achieves a 1.18x speedup on average. In contrast, synchronization using remote-scope promotion achieves a robust 1.25x speedup on average, across a diverse set of graph benchmarks and inputs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "ASPLOS'15 conference proceedings.", } @Article{Owaida:2015:EDS, author = "Muhsen Owaida and Gabriel Falcao and Joao Andrade and Christos Antonopoulos and Nikolaos Bellas and Madhura Purnaprajna and David Novo and Georgios Karakonstantis and Andreas Burg and Paolo Ienne", title = "Enhancing Design Space Exploration by Extending {CPU\slash GPU} Specifications onto {FPGAs}", journal = j-TECS, volume = "14", number = "2", pages = "33:1--33:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2656207", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Mar 26 05:58:56 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "The design cycle for complex special-purpose computing systems is extremely costly and time-consuming. It involves a multiparametric design space exploration for optimization, followed by design verification. Designers of special purpose VLSI implementations often need to explore parameters, such as optimal bitwidth and data representation, through time-consuming Monte Carlo simulations. A prominent example of this simulation-based exploration process is the design of decoders for error correcting systems, such as the Low-Density Parity-Check (LDPC) codes adopted by modern communication standards, which involves thousands of Monte Carlo runs for each design point. Currently, high-performance computing offers a wide set of acceleration options that range from multicore CPUs to Graphics Processing Units (GPUs) and Field Programmable Gate Arrays (FPGAs). The exploitation of diverse target architectures is typically associated with developing multiple code versions, often using distinct programming paradigms. In this context, we evaluate the concept of retargeting a single OpenCL program to multiple platforms, thereby significantly reducing design time. A single OpenCL-based parallel kernel is used without modifications or code tuning on multicore CPUs, GPUs, and FPGAs. We use SOpenCL (Silicon to OpenCL), a tool that automatically converts OpenCL kernels to RTL in order to introduce FPGAs as a potential platform to efficiently execute simulations coded in OpenCL. We use LDPC decoding simulations as a case study. Experimental results were obtained by testing a variety of regular and irregular LDPC codes that range from short/medium (e.g., 8,000 bit) to long length (e.g., 64,800 bit) DVB-S2 codes. We observe that, depending on the design parameters to be simulated, on the dimension and phase of the design, the GPU or FPGA may suit different purposes more conveniently, thus providing different acceleration factors over conventional multicore CPUs.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", } @Article{Power:2015:GGH, author = "Jason Power and Joel Hestness and Marc S. Orr and Mark D. Hill and David A. Wood", title = "{gem5-gpu}: A Heterogeneous {CPU--GPU} Simulator", journal = j-IEEE-COMPUT-ARCHIT-LETT, volume = "14", number = "1", pages = "34--36", month = jan # "\slash " # jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1109/LCA.2014.2299539", ISSN = "1556-6056 (print), 1556-6064 (electronic)", ISSN-L = "1556-6056", bibdate = "Thu Jun 20 17:18:18 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "gem5-gpu is a new simulator that models tightly integrated CPU-GPU systems. It builds on gem5, a modular full-system CPU simulator, and GPGPU-Sim, a detailed GPGPU simulator. gem5-gpu routes most memory accesses through Ruby, which is a highly configurable memory system in gem5. By doing this, it is able to simulate many system configurations, ranging from a system with coherent caches and a single virtual address space across the CPU and GPU to a system that maintains separate GPU and CPU physical address spaces. gem5-gpu can run most unmodified CUDA 3.2 source code. Applications can launch non-blocking kernels, allowing the CPU and GPU to execute simultaneously. We present gem5-gpu's software architecture and a brief performance validation. We also discuss possible extensions to the simulator. gem5-gpu is open source and available at gem5-gpu.cs.wisc.edu.", acknowledgement = ack-nhfb, affiliation = "Power, J (Reprint Author), Univ Wisconsin, Dept Comp Sci, 1210 W Dayton St, Madison, WI 53706 USA. Power, Jason; Hestness, Joel; Orr, Marc S.; Hill, Mark D.; Wood, David A., Univ Wisconsin, Dept Comp Sci, Madison, WI 53706 USA.", author-email = "powerjg@cs.wisc.edu hestness@cs.wisc.edu morr@cs.wisc.edu markhill@cs.wisc.edu david@cs.wisc.edu", da = "2019-06-20", doc-delivery-number = "CL1QK", eissn = "1556-6064", fjournal = "IEEE Computer Architecture Letters", journal-iso = "IEEE Comput. Archit. Lett.", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208", keywords = "general-purpose graphics processors; heterogeneous (hybrid) systems; Modeling techniques; simulators", number-of-cited-references = "9", research-areas = "Computer Science", times-cited = "62", unique-id = "Power:2015:GGH", web-of-science-categories = "Computer Science, Hardware \& Architecture", } @Article{Reano:2015:IUE, author = "Carlos Rea{\~n}o and Federico Silla and Adri{\'a}n Castell{\'o} and Antonio J. Pe{\~n}a and Rafael Mayo and Enrique S. Quintana-Ort{\'\i} and Jos{\'e} Duato", title = "Improving the user experience of the {rCUDA} remote {GPU} virtualization framework", journal = j-CCPE, volume = "27", number = "14", pages = "3746--3770", day = "25", month = sep, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3409", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Sep 28 09:32:54 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "10 Oct 2014", } @Article{Rico-Gallego:2015:ILM, author = "Juan-Antonio Rico-Gallego and Juan-Carlos D{\'\i}az-Mart{\'\i}n", title = "{$ \tau $-Lop}: Modeling performance of shared memory {MPI}", journal = j-PARALLEL-COMPUTING, volume = "46", number = "??", pages = "14--31", month = jul, year = "2015", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed Jun 17 11:37:27 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819115000447", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Rodriguez:2015:OPI, author = "Marcos Rodr{\'\i}guez and Fernando Blesa and Roberto Barrio", title = "{OpenCL} parallel integration of ordinary differential equations: Applications in computational dynamics", journal = j-COMP-PHYS-COMM, volume = "192", number = "??", pages = "228--236", month = jul, year = "2015", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Apr 21 11:56:04 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000703", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Sack:2015:CAM, author = "Paul Sack and William Gropp", title = "Collective Algorithms for Multiported Torus Networks", journal = j-TOPC, volume = "1", number = "2", pages = "12:1--12:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2686882", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Wed Feb 18 16:46:00 MST 2015", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Modern supercomputers with torus networks allow each node to simultaneously pass messages on all of its links. However, most collective algorithms are designed to only use one link at a time. In this work, we present novel multiported algorithms for the scatter, gather, all-gather, and reduce-scatter operations. Our algorithms can be combined to create multiported reduce, all-reduce, and broadcast algorithms. Several of these algorithms involve a new technique where we relax the MPI message-ordering constraints to achieve high performance and restore the correct ordering using an additional stage of redundant communication. According to our models, on an $n$-dimensional torus, our algorithms should allow for nearly a $ 2 n$-fold improvement in communication performance compared to known, single-ported torus algorithms. In practice, we have achieved nearly $ 6 \times $ better performance on a 32k-node 3-dimensional torus.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Saillard:2015:SDV, author = "Emmanuelle Saillard and Patrick Carribault and Denis Barthou", title = "Static\slash dynamic validation of {MPI} collective communications in multi-threaded context", journal = j-SIGPLAN, volume = "50", number = "8", pages = "279--280", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688548", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scientific applications mainly rely on the MPI parallel programming model to reach high performance on supercomputers. The advent of manycore architectures (larger number of cores and lower amount of memory per core) leads to mix MPI with a thread-based model like OpenMP. But integrating two different programming models inside the same application can be tricky and generate complex bugs. Thus, the correctness of hybrid programs requires a special care regarding MPI calls location. For example, identical MPI collective operations cannot be performed by multiple non-synchronized threads. To tackle this issue, this paper proposes a static analysis and a reduced dynamic instrumentation to detect bugs related to misuse of MPI collective operations inside or outside threaded regions. This work extends PARCOACH designed for MPI-only applications and keeps the compatibility with these algorithms. We validated our method on multiple hybrid benchmarks and applications with a low overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Shterenlikht:2015:FC, author = "Anton Shterenlikht and Lee Margetts and Luis Cebamanos and David Henty", title = "{Fortran 2008} coarrays", journal = j-FORTRAN-FORUM, volume = "34", number = "1", pages = "10--30", month = apr, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2754942.2754944", ISSN = "1061-7264 (print), 1931-1311 (electronic)", ISSN-L = "1061-7264", bibdate = "Mon Aug 10 06:22:12 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran-forum.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Coarrays are a Fortran 2008 standard feature intended for SIMD type parallel programming. The runtime environment starts a number of identical executable images of the coarray program, on multiple processors, which could be actual physical processors or threads. Each image has a unique number and its private address space. Ordinary variables are private to an image. Coarray variables are available for read/write access from any other image. Coarray communications are of ``single sided'' type, i.e. a remote call from image A to image B does not need to be accompanied by a corresponding call in image B. This feature makes coarray programming a lot simpler than MPI. The standard provides synchronisation intrinsics to help avoid race conditions or deadlocks. Any ordinary variable can be made into a coarray --- scalars, arrays, intrinsic or derived data types, pointers, allocatables are all allowed. Coarrays can be declared in, and passed to, procedures. Coarrays are thus very flexible and can be used for a number of purposes. For example a collection of coarrays from all or some images can be thought of as a large single array. This is precisely the inverse of the model partitioning logic, typical in MPI programs. A coarray program can exploit functional parallelism too, by delegating distinct tasks to separate images or teams of images. Coarray collectives are expected to become a part of the next version of the Fortran standard. A major unresolved problem of coarray programming is the lack of standard parallel I/O facility in Fortran. In this paper several simple complete coarray programs are shown and compared to alternative parallel technologies --- OpenMP, MPI and Fortran 2008 intrinsic ``do concurrent''. Inter-image communication patterns and data transfer are illustrated. An example of a materials microstructure simulation coarray program scaled up to 32k cores is shown. Problems with coarray I/O at this scale are highlighted and addressed with the use of MPI-I/O. A hybrid MPI/coarray programming is discussed and illustrated with a finite element/cellular automata (CAF{\'E}) multi-scale model. The paper completes with a description of the new coarray language features, expected in the 2015 Fortran standard, and with a brief list of coarray resources", acknowledgement = ack-nhfb, fjournal = "ACM Fortran Forum", journal-URL = "http://portal.acm.org/toc.cfm?id=J286", } @Article{Sosonkina:2015:RAV, author = "Masha Sosonkina and Layne T. Watson and Jian He", title = "Remark on Algorithm 897: {VTDIRECT95}: Serial and Parallel Codes for the Global Optimization Algorithm {DIRECT}", journal = j-TOMS, volume = "41", number = "3", pages = "22:1--22:2", month = jun, year = "2015", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2699459", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Jun 3 17:59:32 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", note = "See \cite{He:2009:AVS}.", abstract = "The Fortran95 code VTDIRECT95, based on the original MPI, has been modified to use MPI-2. An option for VTDIRECT95 is to divide the feasible box into subdomains, and concurrently apply the global direct search algorithm DIRECT within each subdomain. When the number of subdomains is greater than one, a bug causes VTDIRECT95 to occasionally sample outside the given feasible box, which is serious if the objective function is not defined outside the given box. This bug has been fixed, and the sample output files have been updated to reflect the correction. For completeness, the package VTDIRECT95 now contains both the MPI-1 (with the multiple subdomain bug fixed) and the MPI-2 versions of the code.", acknowledgement = ack-nhfb, articleno = "22", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Spencer:2015:DLN, author = "Matt Spencer and Jesse Eickholt and Jianlin Cheng", title = "A deep learning network approach to ab initio protein secondary structure prediction", journal = j-TCBB, volume = "12", number = "1", pages = "103--112", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2343960", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Ab initio protein secondary structure (SS) predictions are utilized to generate tertiary structure predictions, which are increasingly demanded due to the rapid discovery of proteins. Although recent developments have slightly exceeded previous methods of SS prediction, accuracy has stagnated around 80 percent and many wonder if prediction cannot be advanced beyond this ceiling. Disciplines that have traditionally employed neural networks are experimenting with novel deep learning techniques in attempts to stimulate progress. Since neural networks have historically played an important role in SS prediction, we wanted to determine whether deep learning could contribute to the advancement of this field as well. We developed an SS predictor that makes use of the position-specific scoring matrix generated by PSI-BLAST and deep learning network architectures, which we call DNSS. Graphical processing units and CUDA software optimize the deep network architecture and efficiently train the deep networks. Optimal parameters for the training process were determined, and a workflow comprising three separately trained deep networks was constructed in order to make refined predictions. This deep learning network approach was used to predict SS for a fully independent test dataset of 198 proteins, achieving a Q3 accuracy of 80.7 percent and a Sov accuracy of 74.2 percent.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Spiechowicz:2015:GAM, author = "J. Spiechowicz and M. Kostur and L. Machura", title = "{GPU} accelerated {Monte Carlo} simulation of {Brownian} motors dynamics with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "191", number = "??", pages = "140--149", month = jun, year = "2015", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Apr 24 18:44:55 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000417", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Steuwer:2015:GPP, author = "Michel Steuwer and Christian Fensch and Sam Lindley and Christophe Dubach", title = "Generating performance portable code using rewrite rules: from high-level functional expressions to high-performance {OpenCL} code", journal = j-SIGPLAN, volume = "50", number = "9", pages = "205--217", month = sep, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858949.2784754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computers have become increasingly complex with the emergence of heterogeneous hardware combining multicore CPUs and GPUs. These parallel systems exhibit tremendous computational power at the cost of increased programming effort resulting in a tension between performance and code portability. Typically, code is either tuned in a low-level imperative language using hardware-specific optimizations to achieve maximum performance or is written in a high-level, possibly functional, language to achieve portability at the expense of performance. We propose a novel approach aiming to combine high-level programming, code portability, and high-performance. Starting from a high-level functional expression we apply a simple set of rewrite rules to transform it into a low-level functional representation, close to the OpenCL programming model, from which OpenCL code is generated. Our rewrite rules define a space of possible implementations which we automatically explore to generate hardware-specific OpenCL implementations. We formalize our system with a core dependently-typed lambda-calculus along with a denotational semantics which we use to prove the correctness of the rewrite rules. We test our design in practice by implementing a compiler which generates high performance imperative OpenCL code. Our experiments show that we can automatically derive hardware-specific implementations from simple functional high-level algorithmic expressions offering performance on a par with highly tuned code for multicore CPUs and GPUs written by experts.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ICFP '15 conference proceedings.", } @Article{Takizawa:2015:ODT, author = "Hiroyuki Takizawa and Shoichi Hirasawa and Makoto Sugawara and Isaac Gelado and Hiroaki Kobayashi and Wen-mei W. Hwu", title = "Optimized Data Transfers Based on the {OpenCL} Event Management Mechanism", journal = j-SCI-PROG, volume = "2015", number = "??", pages = "576498:1--576498:16", month = "????", year = "2015", CODEN = "SCIPEV", DOI = "https://doi.org/10.1155/2015/576498", ISSN = "1058-9244 (print), 1875-919X (electronic)", ISSN-L = "1058-9244", bibdate = "Tue Sep 20 07:53:44 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sciprogram.bib", URL = "https://www.hindawi.com/journals/sp/2015/576498/", acknowledgement = ack-nhfb, fjournal = "Scientific Programming", journal-URL = "https://www.hindawi.com/journals/sp/", journalabr = "Sci. Prog", } @Article{Tennyson:2015:MOI, author = "P. Gerald Tennyson and G. M. Karthik and G. Phanikumar", title = "{MPI + OpenCL} implementation of a phase-field method incorporating {CALPHAD} description of {Gibbs} energies on heterogeneous computing platforms", journal = j-COMP-PHYS-COMM, volume = "186", number = "??", pages = "48--64", month = jan, year = "2015", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2014.09.014", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Nov 10 08:38:05 MST 2014", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465514003208", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Thebault:2015:SEI, author = "Lo{\"\i}c Th{\'e}bault and Eric Petit and Quang Dinh", title = "Scalable and efficient implementation of {$3$D} unstructured meshes computation: a case study on matrix assembly", journal = j-SIGPLAN, volume = "50", number = "8", pages = "120--129", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Exposing massive parallelism on 3D unstructured meshes computation with efficient load balancing and minimal synchronizations is challenging. Current approaches relying on domain decomposition and mesh coloring struggle to scale with the increasing number of cores per nodes, especially with new many-core processors. In this paper, we propose an hybrid approach using domain decomposition to exploit distributed memory parallelism, Divide-and-Conquer, D{\&}C, to exploit shared memory parallelism and improve locality, and mesh coloring at core level to exploit vectors. It illustrates a new trade-off for many-cores between structuredness, memory locality, and vectorization. We evaluate our approach on the finite element matrix assembly of an industrial fluid dynamic code developed by Dassault Aviation. We compare our D{\&}C approach to domain decomposition and to mesh coloring. D{\&}C achieves a high parallel efficiency, a good data locality as well as an improved bandwidth usage. It competes on current nodes with the optimized pure MPI version with a minimum 10\% speed-up. D{\&}C shows an impressive 319x strong scaling on 512 cores (32 nodes) with only 2000 vertices per core. Finally, the Intel Xeon Phi version has a performance similar to 10 Intel E5-2665 Xeon Sandy Bridge cores and 95\% parallel efficiency on the 60 physical cores. Running on 4 Xeon Phi (240 cores), D{\&}C has 92\% efficiency on the physical cores and performance similar to 33 Intel E5-2665 Xeon Sandy Bridge cores.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Thompson:2015:PCI, author = "Elizabeth Thompson and Nathan Clem and David A. Peter", title = "Parallel {CUDA} implementation of conflict detection for application to airspace deconfliction", journal = j-J-SUPERCOMPUTING, volume = "71", number = "10", pages = "3787--3810", month = oct, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-015-1467-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Tue Sep 29 10:07:24 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/10; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-015-1467-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Vapirev:2015:IRC, author = "A. Vapirev and J. Deca and G. Lapenta and S. Markidis and I. Hur and J.-L. Cambier", title = "Initial results on computational performance of {Intel} many integrated core, {Sandy Bridge}, and graphical processing unit architectures: implementation of a {$1$D C++\slash OpenMP} electrostatic particle-in-cell code", journal = j-CCPE, volume = "27", number = "3", pages = "581--593", day = "10", month = mar, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3248", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Jul 25 19:54:06 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "6 Mar 2014", } @Article{Verschelde:2015:PHC, author = "Jan Verschelde and Xiangcheng Yu", title = "Polynomial homotopy continuation on {GPUs}", journal = j-ACM-COMM-COMP-ALGEBRA, volume = "49", number = "4", pages = "130--133", month = dec, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2893803.2893810", ISSN = "1932-2232 (print), 1932-2240 (electronic)", ISSN-L = "1932-2232", bibdate = "Wed Feb 17 16:05:57 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/sigsam.bib", abstract = "The purpose of the software presentation is to announce a library to track many solution paths defined by a polynomial homotopy on a Graphics Processing Unit (GPU). Developed on NVIDIA graphics cards with CUDA SDKs, our code is released under the GNU GPL license. Via the C interface to PHCpack, we can call our GPU library from Python.", acknowledgement = ack-nhfb, fjournal = "ACM Communications in Computer Algebra", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000", } @Article{Wang:2015:AST, author = "Chun-Kun Wang and Peng-Sheng Chen", title = "Automatic scoping of task clauses for the {OpenMP} tasking model", journal = j-J-SUPERCOMPUTING, volume = "71", number = "3", pages = "808--823", month = mar, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1326-3", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Aug 8 12:23:09 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1326-3", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Wickerson:2015:RSP, author = "John Wickerson and Mark Batty and Bradford M. Beckmann and Alastair F. Donaldson", title = "Remote-scope promotion: clarified, rectified, and verified", journal = j-SIGPLAN, volume = "50", number = "10", pages = "731--747", month = oct, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858965.2814283", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:43 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Modern accelerator programming frameworks, such as OpenCL, organise threads into work-groups. Remote-scope promotion (RSP) is a language extension recently proposed by AMD researchers that is designed to enable applications, for the first time, both to optimise for the common case of intra-work-group communication (using memory scopes to provide consistency only within a work-group) and to allow occasional inter-work-group communication (as required, for instance, to support the popular load-balancing idiom of work stealing). We present the first formal, axiomatic memory model of OpenCL extended with RSP. We have extended the Herd memory model simulator with support for OpenCL kernels that exploit RSP, and used it to discover bugs in several litmus tests and a work-stealing queue, that have been used previously in the study of RSP. We have also formalised the proposed GPU implementation of RSP. The formalisation process allowed us to identify bugs in the description of RSP that could result in well-synchronised programs experiencing memory inconsistencies. We present and prove sound a new implementation of RSP that incorporates bug fixes and requires less non-standard hardware than the original implementation. This work, a collaboration between academia and industry, clearly demonstrates how, when designing hardware support for a new concurrent language feature, the early application of formal tools and techniques can help to prevent errors, such as those we have found, from making it into silicon.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "OOPSLA '15 conference proceedings.", } @Article{Yazdanpanah:2015:PHR, author = "Fahimeh Yazdanpanah and Carlos {\'A}lvarez and Daniel Jim{\'e}nez-Gonz{\'a}lez and Rosa M. Badia and Mateo Valero", title = "{Picos}: a hardware runtime architecture support for {OmpSs}", journal = j-FUT-GEN-COMP-SYS, volume = "53", number = "??", pages = "130--139", month = dec, year = "2015", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2014.12.010", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Aug 12 13:56:06 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X14002702", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X/", keywords = "OpenMP", } @Article{You:2015:VFO, author = "Yi-Ping You and Hen-Jung Wu and Yeh-Ning Tsai and Yen-Ting Chao", title = "{VirtCL}: a framework for {OpenCL} device abstraction and management", journal = j-SIGPLAN, volume = "50", number = "8", pages = "161--172", month = aug, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2858788.2688505", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "The interest in using multiple graphics processing units (GPUs) to accelerate applications has increased in recent years. However, the existing heterogeneous programming models (e.g., OpenCL) abstract details of GPU devices at the per-device level and require programmers to explicitly schedule their kernel tasks on a system equipped with multiple GPU devices. Unfortunately, multiple applications running on a multi-GPU system may compete for some of the GPU devices while leaving other GPU devices unused. Moreover, the distributed memory model defined in OpenCL, where each device has its own memory space, increases the complexity of managing the memory among multiple GPU devices. In this article we propose a framework (called VirtCL) that reduces the programming burden by acting as a layer between the programmer and the native OpenCL run-time system for abstracting multiple devices into a single virtual device and for scheduling computations and communications among the multiple devices. VirtCL comprises two main components: (1) a front-end library, which exposes primary OpenCL APIs and the virtual device, and (2) a back-end run-time system (called CLDaemon) for scheduling and dispatching kernel tasks based on a history-based scheduler. The front-end library forwards computation requests to the back-end CLDaemon, which then schedules and dispatches the requests. We also propose a history-based scheduler that is able to schedule kernel tasks in a contention- and communication-aware manner. Experiments demonstrated that the VirtCL framework introduced a small overhead (mean of 6\%) but outperformed the native OpenCL run-time system for most benchmarks in the Rodinia benchmark suite, which was due to the abstraction layer eliminating the time-consuming initialization of OpenCL contexts. We also evaluated different scheduling policies in VirtCL with a real-world application (clsurf) and various synthetic workload traces. The results indicated that the VirtCL framework provides scalability for multiple kernel tasks running on multi-GPU systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '15 conference proceedings.", } @Article{Younge:2015:SHP, author = "Andrew J. Younge and John Paul Walters and Stephen P. Crago and Geoffrey C. Fox", title = "Supporting High Performance Molecular Dynamics in Virtualized Clusters using {IOMMU}, {SR-IOV}, and {GPUDirect}", journal = j-SIGPLAN, volume = "50", number = "7", pages = "31--38", month = jul, year = "2015", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2817817.2731194", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Tue Feb 16 12:01:42 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "Cloud Infrastructure-as-a-Service paradigms have recently shown their utility for a vast array of computational problems, ranging from advanced web service architectures to high throughput computing. However, many scientific computing applications have been slow to adapt to virtualized cloud frameworks. This is due to performance impacts of virtualization technologies, coupled with the lack of advanced hardware support necessary for running many high performance scientific applications at scale. By using KVM virtual machines that leverage both Nvidia GPUs and InfiniBand, we show that molecular dynamics simulations with LAMMPS and HOOMD run at near-native speeds. This experiment also illustrates how virtualized environments can support the latest parallel computing paradigms, including both MPI+CUDA and new GPUDirect RDMA functionality. Specific findings show initial promise in scaling of such applications to larger production deployments targeting large scale computational workloads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "VEE '15 conference proceedings.", } @Article{Zarrabi:2015:GSA, author = "Amirreza Zarrabi and Khairulmizam Samsudin and Ettikan K. Karuppiah", title = "Gravitational search algorithm using {CUDA}: a case study in high-performance metaheuristics", journal = j-J-SUPERCOMPUTING, volume = "71", number = "4", pages = "1277--1296", month = apr, year = "2015", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-014-1360-1", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Aug 8 12:23:10 MDT 2015", bibsource = "http://link.springer.com/journal/11227/71/4; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-014-1360-1", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Zhu:2015:PIM, author = "Xiangyuan Zhu and Kenli Li and Ahmad Salah and Lin Shi and Keqin Li", title = "Parallel implementation of {MAFFT} on {CUDA}-enabled graphics hardware", journal = j-TCBB, volume = "12", number = "1", pages = "205--218", month = jan, year = "2015", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2014.2351801", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 28 05:40:09 MDT 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Multiple sequence alignment (MSA) constitutes an extremely powerful tool for many biological applications including phylogenetic tree estimation, secondary structure prediction, and critical residue identification. However, aligning large biological sequences with popular tools such as MAFFT requires long runtimes on sequential architectures. Due to the ever increasing sizes of sequence databases, there is increasing demand to accelerate this task. In this paper, we demonstrate how graphic processing units (GPUs), powered by the compute unified device architecture (CUDA), can be used as an efficient computational platform to accelerate the MAFFT algorithm. To fully exploit the GPU's capabilities for accelerating MAFFT, we have optimized the sequence data organization to eliminate the bandwidth bottleneck of memory access, designed a memory allocation and reuse strategy to make full use of limited memory of GPUs, proposed a new modified-run-length encoding (MRLE) scheme to reduce memory consumption, and used high-performance shared memory to speed up I/O operations. Our implementation tested in three NVIDIA GPUs achieves speedup up to 11.28 on a Tesla K20m GPU compared to the sequential MAFFT 7.015.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Zhu:2015:PML, author = "Leqing Zhu and Yadong Zhou and Daxing Zhang and Dadong Wang and Huiyan Wang and Xun Wang", title = "Parallel multi-level {2D-DWT} on {CUDA GPUs} and its application in ring artifact removal", journal = j-CCPE, volume = "27", number = "17", pages = "5188--5202", day = "10", month = dec, year = "2015", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3559", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:20 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "5 Jun 2015", } @Article{Abdelfattah:2016:KOL, author = "Ahmad Abdelfattah and David Keyes and Hatem Ltaief", title = "{KBLAS}: an Optimized Library for Dense Matrix-Vector Multiplication on {GPU} Accelerators", journal = j-TOMS, volume = "42", number = "3", pages = "18:1--18:31", month = may, year = "2016", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2818311", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon May 23 16:40:02 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "KBLAS is an open-source, high-performance library that provides optimized kernels for a subset of Level 2 BLAS functionalities on CUDA-enabled GPUs. Since performance of dense matrix-vector multiplication is hindered by the overhead of memory accesses, a double-buffering optimization technique is employed to overlap data motion with computation. After identifying a proper set of tuning parameters, KBLAS efficiently runs on various GPU architectures while avoiding code rewriting and retaining compliance with the standard BLAS API. Another optimization technique allows ensuring coalesced memory access when dealing with submatrices, especially for high-level dense linear algebra algorithms. All KBLAS kernels have been leveraged to a multi-GPU environment, which requires the introduction of new APIs. Considering general matrices, KBLAS is very competitive with existing state-of-the-art kernels and provides a smoother performance across a wide range of matrix dimensions. Considering symmetric and Hermitian matrices, the KBLAS performance outperforms existing state-of-the-art implementations on all matrix sizes and achieves asymptotically up to 50\% and 60\% speedup against the best competitor on single GPU and multi-GPUs systems, respectively. Performance results also validate our performance model. A subset of KBLAS high-performance kernels have been integrated into NVIDIA's standard BLAS implementation (cuBLAS) for larger dissemination, starting from version 6.0.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Aji:2016:MAA, author = "Ashwin M. Aji and Lokendra S. Panwar and Feng Ji and Karthik Murthy and Milind Chabbi and Pavan Balaji and Keith R. Bisset and James Dinan and Wu-chun Feng and John Mellor-Crummey and Xiaosong Ma and Rajeev Thakur", title = "{MPI-ACC}: Accelerator-Aware {MPI} for Scientific Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "5", pages = "1401--1414", month = may, year = "2016", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2015.2446479", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Apr 15 13:45:22 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2016/05/07127020-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2016/05/07127020-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Aji:2016:MEA, author = "Ashwin M. Aji and Antonio J. Pe{\~n}a and Pavan Balaji and Wu-chun Feng", title = "{MultiCL}: Enabling automatic scheduling for task-parallel workloads in {OpenCL}", journal = j-PARALLEL-COMPUTING, volume = "58", number = "??", pages = "37--55", month = oct, year = "2016", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Sep 27 08:00:38 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300357", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Aldea:2016:OES, author = "Sergio Aldea and Alvaro Estebanez and Diego R. Llanos and Arturo Gonzalez-Escribano", title = "An {OpenMP} Extension that Supports Thread-Level Speculation", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "1", pages = "78--91", month = jan, year = "2016", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2015.2393870", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Dec 15 09:28:10 MST 2015", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2016/01/07014262-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2016/01/07014262-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{AlQuraishi:2016:CBP, author = "Eman AlQuraishi and Eman AlDwaisan and Alaa AlSaqaa and Imtiaz Ahmad", title = "A {CUDA}-based parallel implementation of a test vectors encoding algorithm in compression-based scan designs", journal = j-INT-J-PAR-EMER-DIST-SYS, volume = "31", number = "3", pages = "280--293", year = "2016", CODEN = "????", DOI = "https://doi.org/10.1080/17445760.2015.1016516", ISSN = "1744-5760 (print), 1744-5779 (electronic)", ISSN-L = "1744-5760", bibdate = "Mon Sep 12 09:19:42 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.tandfonline.com/toc/gpaa20/31/3", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel, Emergent and Distributed Systems: IJPEDS", journal-URL = "http://www.tandfonline.com/loi/gpaa20", onlinedate = "05 Mar 2015", } @Article{Andion:2016:LAA, author = "Jos{\'e} M. Andi{\'o}n and Manuel Arenaz and Fran{\c{c}}ois Bodin and Gabriel Rodr{\'\i}guez and Juan Touri{\~n}o", title = "Locality-Aware Automatic Parallelization for {GPGPU} with {OpenHMPP} Directives", journal = j-INT-J-PARALLEL-PROG, volume = "44", number = "3", pages = "620--643", month = jun, year = "2016", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-015-0362-9", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Mon May 30 09:25:55 MDT 2016", bibsource = "http://link.springer.com/journal/10766/44/3; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-015-0362-9", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Andujar:2016:OSF, author = "Francisco J. And{\'u}jar and Juan A. Villar and Francisco J. Alfaro and Jos{\'e} L. S{\'a}nchez and Jesus Escudero-Sahuquillo", title = "An open-source family of tools to reproduce {MPI}-based workloads in interconnection network simulators", journal = j-J-SUPERCOMPUTING, volume = "72", number = "12", pages = "4601--4628", month = dec, year = "2016", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1757-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:30 MDT 2017", bibsource = "http://link.springer.com/journal/11227/72/12; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Aubrey-Jones:2016:SMI, author = "Tristan Aubrey-Jones and Bernd Fischer", title = "Synthesizing {MPI} Implementations from Functional Data-Parallel Programs", journal = j-INT-J-PARALLEL-PROG, volume = "44", number = "3", pages = "552--573", month = jun, year = "2016", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-015-0359-4", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Mon May 30 09:25:55 MDT 2016", bibsource = "http://link.springer.com/journal/10766/44/3; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-015-0359-4", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Bader:2016:EMT, author = "David A. Bader", title = "Evolving {MPI+X} Toward Exascale", journal = j-COMPUTER, volume = "49", number = "8", pages = "10--10", month = aug, year = "2016", CODEN = "CPTRB4", ISSN = "0018-9162 (print), 1558-0814 (electronic)", ISSN-L = "0018-9162", bibdate = "Tue Aug 23 06:56:16 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/computer2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://csdl.computer.org/csdl/mags/co/2016/08/mco2016080010.html", abstract-URL = "http://csdl.computer.org/csdl/mags/co/2016/08/mco2016080010-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/portal/web/csdl/magazines/computer", } @Article{Batty:2016:OSA, author = "Mark Batty and Alastair F. Donaldson and John Wickerson", title = "Overhauling {SC} atomics in {C11} and {OpenCL}", journal = j-SIGPLAN, volume = "51", number = "1", pages = "634--648", month = jan, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2914770.2837637", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:57 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the conceptual simplicity of sequential consistency (SC), the semantics of SC atomic operations and fences in the C11 and OpenCL memory models is subtle, leading to convoluted prose descriptions that translate to complex axiomatic formalisations. We conduct an overhaul of SC atomics in C11, reducing the associated axioms in both number and complexity. A consequence of our simplification is that the SC operations in an execution no longer need to be totally ordered. This relaxation enables, for the first time, efficient and exhaustive simulation of litmus tests that use SC atomics. We extend our improved C11 model to obtain the first rigorous memory model formalisation for OpenCL (which extends C11 with support for heterogeneous many-core programming). In the OpenCL setting, we refine the SC axioms still further to give a sensible semantics to SC operations that employ a `memory scope' to restrict their visibility to specific threads. Our overhaul requires slight strengthenings of both the C11 and the OpenCL memory models, causing some behaviours to become disallowed. We argue that these strengthenings are natural, and that all of the formalised C11 and OpenCL compilation schemes of which we are aware (Power and x86 CPUs for C11, AMD GPUs for OpenCL) remain valid in our revised models. Using the HERD memory model simulator, we show that our overhaul leads to an exponential improvement in simulation time for C11 litmus tests compared with the original model, making *exhaustive* simulation competitive, time-wise, with the *non-exhaustive* CDSChecker tool.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "POPL '16 conference proceedings.", } @Article{Bolis:2016:APA, author = "A. Bolis and C. D. Cantwell and D. Moxey and D. Serson and S. J. Sherwin", title = "An adaptable parallel algorithm for the direct numerical simulation of incompressible turbulent flows using a {Fourier} spectral\slash $hp$ element method and {MPI} virtual topologies", journal = j-COMP-PHYS-COMM, volume = "206", number = "??", pages = "17--25", month = sep, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 10 18:27:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S001046551630100X", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Calore:2016:PPA, author = "Enrico Calore and Alessandro Gabbana and Jiri Kraus and Sebastiano Fabio Schifano and Raffaele Tripiccione", title = "Performance and portability of accelerated lattice {Boltzmann} applications with {OpenACC}", journal = j-CCPE, volume = "28", number = "12", pages = "3485--3502", day = "25", month = aug, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3862", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Sep 13 08:30:12 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Chang:2016:APC, author = "Chih-Hung Chang and Chih-Wei Lu and Chao-Tung Yang and Tzu-Chieh Chang", title = "An approach of performance comparisons with {OpenMP} and {CUDA} parallel programming on multicore systems", journal = j-CCPE, volume = "28", number = "16", pages = "4230--4245", month = nov, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3829", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Nov 17 07:11:02 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Chang:2016:DLD, author = "Li-Wen Chang and Hee-Seok Kim and Wen-mei W. Hwu", title = "{DySel}: Lightweight Dynamic Selection for Kernel-based Data-parallel Programming Model", journal = j-SIGPLAN, volume = "51", number = "4", pages = "667--680", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872373", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The rising pressure for simultaneously improving performance and reducing power is driving more diversity into all aspects of computing devices. An algorithm that is well-matched to the target hardware can run multiple times faster and more energy efficiently than one that is not. The problem is complicated by the fact that a program's input also affects the appropriate choice of algorithm. As a result, software developers have been faced with the challenge of determining the appropriate algorithm for each potential combination of target device and data. This paper presents DySel, a novel runtime system for automating such determination for kernel-based data parallel programming models such as OpenCL, CUDA, OpenACC, and C++AMP. These programming models cover many applications that demand high performance in mobile, cloud and high-performance computing. DySel systematically deploys candidate kernels on a small portion of the actual data to determine which achieves the best performance for the hardware-data combination. The test-deployment, referred to as micro-profiling, contributes to the final execution result and incurs less than 8\% of overhead in the worst observed case when compared to an oracle. We show four major use cases where DySel provides significantly more consistent performance without tedious effort from the developer.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '16 conference proceedings.", } @Article{Cores:2016:ROM, author = "Iv{\'a}n Cores and M{\'o}nica Rodr{\'\i}guez and Patricia Gonz{\'a}lez and Mar{\'\i}a J. Mart{\'\i}n", title = "Reducing the overhead of an {MPI} application-level migration approach", journal = j-PARALLEL-COMPUTING, volume = "54", number = "??", pages = "72--82", month = may, year = "2016", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Wed May 4 17:36:47 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819116000429", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Creech:2016:TSS, author = "Timothy Creech and Rajeev Barua", title = "Transparently Space Sharing a Multicore Among Multiple Processes", journal = j-TOPC, volume = "3", number = "3", pages = "17:1--17:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3001910", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Mon Dec 26 17:40:41 MST 2016", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "As hardware becomes increasingly parallel and the availability of scalable parallel software improves, the problem of managing multiple multithreaded applications (processes) becomes important. Malleable processes, which can vary the number of threads used as they run, enable sophisticated and flexible resource management. Although many existing applications parallelized for SMPs with parallel runtimes are in fact already malleable, deployed runtime environments provide no interface nor any strategy for intelligently allocating hardware threads or even preventing oversubscription. Prior research methods either depend on profiling applications ahead of time to make good decisions about allocations or do not account for process efficiency at all, leading to poor performance. None of these prior methods have been adapted widely in practice. This article presents the Scheduling and Allocation with Feedback (SCAF) system: a drop-in runtime solution that supports existing malleable applications in making intelligent allocation decisions based on observed efficiency without any changes to semantics, program modification, offline profiling, or even recompilation. Our existing implementation can control most unmodified OpenMP applications. Other malleable threading libraries can also easily be supported with small modifications without requiring application modification or recompilation. In this work, we present the SCAF daemon and a SCAF-aware port of the GNU OpenMP runtime. We present a new technique for estimating process efficiency purely at runtime using available hardware counters and demonstrate its effectiveness in aiding allocation decisions. We evaluated SCAF using NAS NPB parallel benchmarks on five commodity parallel platforms, enumerating architectural features and their effects on our scheme. We measured the benefit of SCAF in terms of sum of speedups improvement (a common metric for multiprogrammed environments) when running all benchmark pairs concurrently compared to equipartitioning-the best existing competing scheme in the literature. We found that SCAF improves on equipartitioning on four out of five machines, showing a mean improvement factor in sum of speedups of 1.04 to 1.11x for benchmark pairs, depending on the machine, and 1.09x on average. Since we are not aware of any widely available tool for equipartitioning, we also compare SCAF against multiprogramming using unmodified OpenMP, which is the only environment available to end users today. SCAF improves on the unmodified OpenMP runtimes for all five machines, with a mean improvement of 1.08 to 2.07x, depending on the machine, and 1.59x on average.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Creel:2016:NJM, author = "Michael Creel", title = "A Note on {Julia} and {MPI}, with Code Examples", journal = j-COMP-ECONOMICS, volume = "48", number = "3", pages = "??--??", month = "", year = "2016", CODEN = "CNOMEL", DOI = "https://doi.org/10.1007/s10614-015-9516-5", ISSN = "", ISSN-L = "0927-7099", bibdate = "Fri Apr 9 07:54:52 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/julia.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10614-015-9516-5", acknowledgement = ack-nhfb, fjournal = "Computational Economics", } @Book{Czech:2016:IPC, author = "Zbigniew J. Czech", title = "Introduction to Parallel Computing", publisher = pub-CAMBRIDGE, address = pub-CAMBRIDGE:adr, pages = "xvii + 354", year = "2016", DOI = "https://doi.org/10.1017/9781316795835", ISBN = "1-107-17439-2 (hardcover), 1-316-79583-7 (e-book)", ISBN-13 = "978-1-107-17439-9 (hardcover), 978-1-316-79583-5 (e-book)", LCCN = "QA76.58 .C975 2016", bibdate = "Fri Mar 31 11:22:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib; z3950.loc.gov:7090/Voyager", abstract = "The constantly increasing demand for more computing power can seem impossible to keep up with. However, multicore processors capable of performing computations in parallel allow computers to tackle ever larger problems in a wide variety of applications. This book provides a comprehensive introduction to parallel computing, discussing theoretical issues such as the fundamentals of concurrent processes, models of parallel and distributed computing, and metrics for evaluating and comparing parallel algorithms, as well as practical issues, including methods of designing and implementing shared- and distributed-memory programs, and standards for parallel program implementation, in particular MPI and OpenMP interfaces. Each chapter presents the basics in one place followed by advanced topics, allowing novices and experienced practitioners to quickly find what they need. A glossary and more than 80 exercises with selected solutions aid comprehension. The book is recommended as a text for advanced undergraduate or graduate students and as a reference for practitioners.", acknowledgement = ack-nhfb, subject = "Parallel processing (Electronic computers)", tableofcontents = "Concurrent processes \\ Basic models of parallel computation \\ Elementary parallel algorithms \\ Designing parallel algorithms \\ Architectures of parallel computers \\ Message-passing programming \\ Shared-memory programming", } @Article{Dathathri:2016:CAL, author = "Roshan Dathathri and Ravi Teja Mullapudi and Uday Bondhugula", title = "Compiling Affine Loop Nests for a Dynamic Scheduling Runtime on Shared and Distributed Memory", journal = j-TOPC, volume = "3", number = "2", pages = "12:1--12:??", month = aug, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2948975", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Sep 23 15:24:52 MDT 2016", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Current de-facto parallel programming models like OpenMP and MPI make it difficult to extract task-level dataflow parallelism as opposed to bulk-synchronous parallelism. Task parallel approaches that use point-to-point synchronization between dependent tasks in conjunction with dynamic scheduling dataflow runtimes are thus becoming attractive. Although good performance can be extracted for both shared and distributed memory using these approaches, there is little compiler support for them. In this article, we describe the design of compiler--runtime interaction to automatically extract coarse-grained dataflow parallelism in affine loop nests for both shared and distributed-memory architectures. We use techniques from the polyhedral compiler framework to extract tasks and generate components of the runtime that are used to dynamically schedule the generated tasks. The runtime includes a distributed decentralized scheduler that dynamically schedules tasks on a node. The schedulers on different nodes cooperate with each other through asynchronous point-to-point communication, and all of this is achieved by code automatically generated by the compiler. On a set of six representative affine loop nest benchmarks, while running on 32 nodes with 8 threads each, our compiler-assisted runtime yields a geometric mean speedup of $ 143.6 \times $ ($ 70.3 \times $ to $ 474.7 \times $) over the sequential version and a geometric mean speedup of $ 1.64 \times $ ($ 1.04 \times $ to $ 2.42 \times $) over the state-of-the-art automatic parallelization approach that uses bulk synchronization. We also compare our system with past work that addresses some of these challenges on shared memory, and an emerging runtime (Intel Concurrent Collections) that demands higher programmer input and effort in parallelizing. To the best of our knowledge, ours is also the first automatic scheme that allows for dynamic scheduling of affine loop nests on a cluster of multicores.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Deniz:2016:MGM, author = "Etem Deniz and Alper Sen", title = "{MINIME-GPU}: Multicore Benchmark Synthesizer for {GPUs}", journal = j-TACO, volume = "12", number = "4", pages = "34:1--34:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818693", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Tue Feb 16 15:36:38 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "We introduce MINIME-GPU, a novel automated benchmark synthesis framework for graphics processing units (GPUs) that serves to speed up architectural simulation of modern GPU architectures. Our framework captures important characteristics of original GPU applications and generates synthetic GPU benchmarks using the Open Computing Language (OpenCL) library from those applications. To the best of our knowledge, this is the first time synthetic OpenCL benchmarks for GPUs are generated from existing applications. We use several characteristics, including instruction throughput, compute unit occupancy, and memory efficiency, to compare the similarity of original applications and their corresponding synthetic benchmarks. The experimental results show that our synthetic benchmark generation framework is capable of generating synthetic benchmarks that have similar characteristics with the original applications from which they are generated. On average, the similarity (accuracy) is 96\% and the speedup is 541 $ \times $ . In addition, our synthetic benchmarks use the OpenCL library, which allows us to obtain portable human readable benchmarks as opposed to using assembly-level code, and they are faster and smaller than the original applications from which they are generated. We experimentally validated that our synthetic benchmarks preserve the characteristics of the original applications across different architectures.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Dinan:2016:IEM, author = "James Dinan and Pavan Balaji and Darius Buntinas and David Goodell and William Gropp and Rajeev Thakur", title = "An implementation and evaluation of the {MPI 3.0} one-sided communication interface", journal = j-CCPE, volume = "28", number = "17", pages = "4385--4404", day = "10", month = dec, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3758", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Nov 17 07:11:03 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{DiPietro:2016:CLD, author = "Roberto {Di Pietro} and Flavio Lombardi and Antonio Villani", title = "{CUDA} Leaks: a Detailed Hack for {CUDA} and a (Partial) Fix", journal = j-TECS, volume = "15", number = "1", pages = "15:1--15:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2801153", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Wed Jun 8 09:43:30 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are increasingly common on desktops, servers, and embedded platforms. In this article, we report on new security issues related to CUDA, which is the most widespread platform for GPU computing. In particular, details and proofs-of-concept are provided about novel vulnerabilities to which CUDA architectures are subject. We show how such vulnerabilities can be exploited to cause severe information leakage. As a case study, we experimentally show how to exploit one of these vulnerabilities on a GPU implementation of the AES encryption algorithm. Finally, we also suggest software patches and alternative approaches to tackle the presented vulnerabilities.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", } @Article{Eckert:2016:HAL, author = "C. H. J. Eckert and E. Zenker and M. Bussmann and D. Albach", title = "{HASEonGPU} --- an adaptive, load-balanced {MPI\slash GPU}-code for calculating the amplified spontaneous emission in high power laser media", journal = j-COMP-PHYS-COMM, volume = "207", number = "??", pages = "362--374", month = oct, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Aug 30 18:08:51 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465516301436", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Fabeiro:2016:WPP, author = "Jorge F. Fabeiro and Diego Andrade and Basilio B. Fraguela", title = "Writing a performance-portable matrix multiplication", journal = j-PARALLEL-COMPUTING, volume = "52", number = "??", pages = "65--77", month = feb, year = "2016", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2015.12.005", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 12 18:56:20 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819115001611", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", keywords = "GPU; Heterogeneous Programming Library (HPL); Intel Xeon Phi; MPI; OpenCL; OpenMP", } @Article{Gong:2016:NPG, author = "Jing Gong and Stefano Markidis and Erwin Laure and Matthew Otten and Paul Fischer and Misun Min", title = "Nekbone performance on {GPUs} with {OpenACC} and {CUDA} {Fortran} implementations", journal = j-J-SUPERCOMPUTING, volume = "72", number = "11", pages = "4160--4180", month = nov, year = "2016", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1744-5", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:30 MDT 2017", bibsource = "http://link.springer.com/journal/11227/72/11; https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Guang:2016:NMN, author = "Suo Guang", title = "{NR-MPI}: A Non-stop and Fault Resilient {MPI} Supporting Programmer Defined Data Backup and Restore for {E}-scale Super Computing Systems", journal = j-SUPERFRI, volume = "3", number = "1", pages = "4--21", month = "????", year = "2016", CODEN = "????", ISSN = "2409-6008 (print), 2313-8734 (electronic)", bibdate = "Sat Nov 11 07:15:27 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/superfri.bib", URL = "http://superfri.org/superfri/article/view/89", acknowledgement = ack-nhfb, fjournal = "Supercomputing Frontiers and Innovations", journal-URL = "http://superfri.org/superfri/issue/archive", } @Article{Hamidouche:2016:CAO, author = "Khaled Hamidouche and Akshay Venkatesh and Ammar Ahmad Awan and Hari Subramoni and Ching-Hsiang Chu and Dhabaleswar K. Panda", title = "{CUDA}-Aware {OpenSHMEM}: Extensions and Designs for High Performance {OpenSHMEM} on {GPU} Clusters", journal = j-PARALLEL-COMPUTING, volume = "58", number = "??", pages = "27--36", month = oct, year = "2016", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Sep 27 08:00:38 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300345", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Haque:2016:ACV, author = "Syed Arefinul Haque and Salekul Islam and Md. Jahidul Islam and Jean-Charles Gr{\'e}goire", title = "An architecture for client virtualization: a case study", journal = j-COMP-NET-AMSTERDAM, volume = "100", number = "??", pages = "75--89", day = "8", month = may, year = "2016", CODEN = "????", ISSN = "1389-1286 (print), 1872-7069 (electronic)", ISSN-L = "1389-1286", bibdate = "Thu May 12 08:55:09 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compnetamsterdam2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "http://www.sciencedirect.com/science/article/pii/S1389128616300421", acknowledgement = ack-nhfb, fjournal = "Computer Networks (Amsterdam, Netherlands: 1999)", journal-URL = "http://www.sciencedirect.com/science/journal/13891286/", } @Article{Hariri:2016:PPA, author = "F. Hariri and T. M. Tran and A. Jocksch and E. Lanti and J. Progsch and P. Messmer and S. Brunner and C. Gheller and L. Villard", title = "A portable platform for accelerated {PIC} codes and its application to {GPUs} using {OpenACC}", journal = j-COMP-PHYS-COMM, volume = "207", number = "??", pages = "69--82", month = oct, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Aug 30 18:08:51 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465516301242", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Hu:2016:CLG, author = "Liang Hu and Xilong Che and Si-Qing Zheng", title = "A Closer Look at {GPGPU}", journal = j-COMP-SURV, volume = "48", number = "4", pages = "60:1--60:??", month = may, year = "2016", CODEN = "CMSVAN", DOI = "https://doi.org/10.1145/2873053", ISSN = "0360-0300 (print), 1557-7341 (electronic)", ISSN-L = "0360-0300", bibdate = "Mon May 2 16:19:12 MDT 2016", bibsource = "http://www.acm.org/pubs/contents/journals/surveys/; https://www.math.utah.edu/pub/tex/bib/compsurv.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "The lack of detailed white box illustration leaves a gap in the field of GPGPU (General-Purpose Computing on the Graphic Processing Unit), thus hindering users and researchers from exploring hardware potential while improving application performance. This article bridges the gap by demystifying the micro-architecture and operating mechanism of GPGPU. We propose a descriptive model that addresses key issues of most concerns, including task organization, hardware structure, scheduling mechanism, execution mechanism, and memory access. We also validate the effectiveness of our model by interpreting the software/hardware cooperation of CUDA.", acknowledgement = ack-nhfb, articleno = "60", fjournal = "ACM Computing Surveys", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J204", } @Article{Hung:2016:EBP, author = "Che-Lun Hung and Chun-Yuan Lin and Chia-Shin Ou and Yuan-Hong Tseng and Po-Yen Hung and Ship-Peng Li and Chun-Ting Fu", title = "Efficient bit-parallel subcircuit extraction using {CUDA}", journal = j-CCPE, volume = "28", number = "16", pages = "4326--4338", month = nov, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3732", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Nov 17 07:11:02 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Hunold:2016:RMB, author = "Sascha Hunold and Alexandra Carpen-Amarie", title = "Reproducible {MPI} Benchmarking is Still Not as Easy as You Think", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "12", pages = "3617--3630", month = dec, year = "2016", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Nov 16 18:43:09 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2016/12/07426807-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Ibanez:2016:HMT, author = "Dan Ibanez and Ian Dunn and Mark S. Shephard", title = "Hybrid {MPI}-thread parallelization of adaptive mesh operations", journal = j-PARALLEL-COMPUTING, volume = "52", number = "??", pages = "133--143", month = feb, year = "2016", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 12 18:56:20 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819116000041", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Iida:2016:GET, author = "Yuki Iida and Yusuke Fujii and Takuya Azumi and Nobuhiko Nishio and Shinpei Kato", title = "{GPUrpc}: Exploring Transparent Access to Remote {GPUs}", journal = j-TECS, volume = "16", number = "1", pages = "17:1--17:??", month = nov, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950056", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Nov 3 16:48:38 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Graphics processing units (GPUs) are increasingly used for high-performance computing. Programming frameworks for general-purpose computing on GPUs (GPGPU), such as CUDA and OpenCL, are also maturing. Driving this trend is the recent proliferation of mobile devices such as smartphones and wearable computers. These devices are increasingly incorporating computationally intensive applications that involve some form of environmental recognition such as augmented reality (AR) or voice recognition. However, devices with low computational power cannot satisfy such demanding computing requirements. The CPU load of these devices could be reduced by offloading computation onto GPUs on the cloud. This paper presents GPUrpc, a remote procedure call (RPC) extension to Gdev, which is a rich set of runtime libraries and device drivers for achieving first-class GPU resource management. GPUrpc allows developers to use CUDA for GPGPU development work. Existing research uses RPCs based on the CUDA application programming interfaces (APIs); hence, all CUDA APIs require communication. To reduce communication overhead, we use an RPC based on a low-level API than CUDA API and reduced API that does not require communication. Our evaluation conducted on Linux and NVIDIA GPUs shows that the basic performance of our prototype implementation is reliable in comparison with the existing method. Evaluation using the Rodinia benchmark suite designed for research in heterogeneous parallel computing showed that GPUrpc is effective for applications such as image processing and data mining. GPUrpc also can improve power consumption to approximately 1/6 that of CPU processing for performing $ 512 \times 512 $ matrix multiplication.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", remark = "Special issue on VIPES, special issue on ICESS2015 and regular papers.", } @Article{Ilie:2016:AEC, author = "Silvana Ilie and Arne Storjohann", title = "Abstracts of the {2015 East Coast Computer Algebra Day}", journal = j-ACM-COMM-COMP-ALGEBRA, volume = "50", number = "1", pages = "35--39", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2930964.2930969", ISSN = "1932-2232 (print), 1932-2240 (electronic)", ISSN-L = "1932-2232", bibdate = "Wed Apr 27 16:14:51 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigsam.bib", abstract = "In the past decade, the introduction of low-level heterogeneous programming models, in particular CUDA, has brought supercomputing to the level of the desktop computer. However, these models bring notable challenges, even to expert programmers. Indeed, fully exploiting the power of hardware accelerators with CUDA-like code often requires significant code optimization e.ort. While this development can certainly yield high performance, it is desirable for some programmers to avoid the explicit management of device initialization and data transfer between memory levels. To this end, high-level models for accelerator programming, like OpenMP and OpenACC, have become an important research direction. With these models, programmers only need to annotate their C/C++ code to indicate which code portion is to be executed on the device and how data maps between host and device.", acknowledgement = ack-nhfb, fjournal = "ACM Communications in Computer Algebra", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000", } @Article{Islam:2016:EMT, author = "Tanzima Islam and Kathryn Mohror and Martin Schulz", title = "Exploring the {MPI} tool information interface: features and capabilities", journal = j-IJHPCA, volume = "30", number = "2", pages = "212--222", year = "2016", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342015600507", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Apr 4 14:51:30 MDT 2017", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/full/10.1177/1094342015600507", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = may, } @Article{Kannan:2016:HPP, author = "Ramakrishnan Kannan and Grey Ballard and Haesun Park", title = "A high-performance parallel algorithm for nonnegative matrix factorization", journal = j-SIGPLAN, volume = "51", number = "8", pages = "9:1--9:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851152", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Non-negative matrix factorization (NMF) is the problem of determining two non-negative low rank factors W and H, for the given input matrix A, such that A \approx WH. NMF is a useful tool for many applications in different domains such as topic modeling in text mining, background separation in video analysis, and community detection in social networks. Despite its popularity in the data mining community, there is a lack of efficient distributed algorithms to solve the problem for big data sets. We propose a high-performance distributed-memory parallel algorithm that computes the factorization by iteratively solving alternating non-negative least squares (NLS) subproblems for W and H. It maintains the data and factor matrices in memory (distributed across processors), uses MPI for interprocessor communication, and, in the dense case, provably minimizes communication costs (under mild assumptions). As opposed to previous implementations, our algorithm is also flexible: (1) it performs well for both dense and sparse matrices, and (2) it allows the user to choose any one of the multiple algorithms for solving the updates to low rank factors W and H within the alternating iterations. We demonstrate the scalability of our algorithm and compare it with baseline implementations, showing significant performance improvements.", acknowledgement = ack-nhfb, articleno = "9", fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '16 conference proceedings.", } @Article{Kim:2016:DOF, author = "Junghyun Kim and Gangwon Jo and Jaehoon Jung and Jungwon Kim and Jaejin Lee", title = "A distributed {OpenCL} framework using redundant computation and data replication", journal = j-SIGPLAN, volume = "51", number = "6", pages = "553--569", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908094", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Applications written solely in OpenCL or CUDA cannot execute on a cluster as a whole. Most previous approaches that extend these programming models to clusters are based on a common idea: designating a centralized host node and coordinating the other nodes with the host for computation. However, the centralized host node is a serious performance bottleneck when the number of nodes is large. In this paper, we propose a scalable and distributed OpenCL framework called SnuCL-D for large-scale clusters. SnuCL-D's remote device virtualization provides an OpenCL application with an illusion that all compute devices in a cluster are confined in a single node. To reduce the amount of control-message and data communication between nodes, SnuCL-D replicates the OpenCL host program execution and data in each node. We also propose a new OpenCL host API function and a queueing optimization technique that significantly reduce the overhead incurred by the previous centralized approaches. To show the effectiveness of SnuCL-D, we evaluate SnuCL-D with a microbenchmark and eleven benchmark applications on a large-scale CPU cluster and a medium-scale GPU cluster.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '16 conference proceedings.", } @Article{Kobayashi:2016:HSV, author = "Ryohei Kobayashi and Tomohiro Misono and Kenji Kise", title = "A High-speed {Verilog} {HDL} Simulation Method using a Lightweight Translator", journal = j-COMP-ARCH-NEWS, volume = "44", number = "4", pages = "26--31", month = sep, year = "2016", CODEN = "CANED2", DOI = "https://doi.org/10.1145/3039902.3039908", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Thu Jan 12 18:43:44 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Designing with Hardware Description Languages (HDLs) is still the de facto standard way to develop FPGA-based custom computing systems, and RTL simulation is an important step in ensuring that the designed hardware behavior meets the design specification. In this paper, we propose a new high-speed Verilog HDL simulation method. It is based on two previously proposed techniques: ArchHDL and Pyverilog. ArchHDL is used as a simulation engine in the method because the RTL simulation provided by ArchHDL can be parallelized with OpenMP. We use Pyverilog to develop a code translator to convert Verilog HDL source code into ArchHDL code, and due to this, the translator can be realized and its implementation is lightweight. We compare the proposed method with Synopsys VCS, and the experimental results show that the RTL simulation behavior and speed are same as that of Synopsys VCS and up to 5.8x better respectively.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "HEART '16 conference proceedings.", } @Article{Koitka:2016:NGA, author = "Sven Koitka and Christoph M. Friedrich", title = "\pkg{nmfgpu4R}: {GPU}-Accelerated Computation of the Non-Negative Matrix Factorization {(NMF)} Using {CUDA} Capable Hardware", journal = j-R-JOURNAL, volume = "8", number = "2", pages = "382--392", month = dec, year = "2016", DOI = "https://doi.org/10.32614/rj-2016-053", ISSN = "2073-4859", ISSN-L = "2073-4859", bibdate = "Fri May 21 06:58:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/rjournal.bib", URL = "https://journal.r-project.org/archive/2016/RJ-2016-053", acknowledgement = ack-nhfb, fjournal = "The R Journal", journal-URL = "http://journal.r-project.org/", } @Article{Kolesnichenko:2016:CBG, author = "Alexey Kolesnichenko and Christopher M. Poskitt and Sebastian Nanz and Bertrand Meyer", title = "Contract-based general-purpose {GPU} programming", journal = j-SIGPLAN, volume = "51", number = "3", pages = "75--84", month = mar, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2936314.2814216", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:58 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Using GPUs as general-purpose processors has revolutionized parallel computing by offering, for a large and growing set of algorithms, massive data-parallelization on desktop machines. An obstacle to widespread adoption, however, is the difficulty of programming them and the low-level control of the hardware required to achieve good performance. This paper suggests a programming library, SafeGPU, that aims at striking a balance between programmer productivity and performance, by making GPU data-parallel operations accessible from within a classical object-oriented programming language. The solution is integrated with the design-by-contract approach, which increases confidence in functional program correctness by embedding executable program specifications into the program text. We show that our library leads to modular and maintainable code that is accessible to GPGPU non-experts, while providing performance that is comparable with hand-written CUDA code. Furthermore, runtime contract checking turns out to be feasible, as the contracts can be executed on the GPU.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "GPCE '15 conference proceedings.", } @Article{Kulkarni:2016:HAP, author = "Kedar Kulkarni and Shreeya Badhe and Geetanjali Gadre", title = "{HCA} aware Parallel Communication Library: A feasibility study for offloading {MPI} requirements", journal = j-SUPERFRI, volume = "3", number = "3", pages = "56--60", month = "????", year = "2016", CODEN = "????", ISSN = "2409-6008 (print), 2313-8734 (electronic)", bibdate = "Sat Nov 11 07:15:27 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/superfri.bib", URL = "http://superfri.org/superfri/article/view/109", acknowledgement = ack-nhfb, fjournal = "Supercomputing Frontiers and Innovations", journal-URL = "http://superfri.org/superfri/issue/archive", } @Article{Kutyniok:2016:SFD, author = "Gitta Kutyniok and Wang-Q Lim and Rafael Reisenhofer", title = "{ShearLab $3$D}: Faithful Digital Shearlet Transforms Based on Compactly Supported Shearlets", journal = j-TOMS, volume = "42", number = "1", pages = "5:1--5:42", month = feb, year = "2016", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2740960", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Mar 1 17:07:56 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "Wavelets and their associated transforms are highly efficient when approximating and analyzing one-dimensional signals. However, multivariate signals such as images or videos typically exhibit curvilinear singularities, which wavelets are provably deficient in sparsely approximating and also in analyzing in the sense of, for instance, detecting their direction. Shearlets are a directional representation system extending the wavelet framework, which overcomes those deficiencies. Similar to wavelets, shearlets allow a faithful implementation and fast associated transforms. In this article, we will introduce a comprehensive carefully documented software package coined ShearLab 3D (www.ShearLab.org) and discuss its algorithmic details. This package provides MATLAB code for a novel faithful algorithmic realization of the 2D and 3D shearlet transform (and their inverses) associated with compactly supported universal shearlet systems incorporating the option of using CUDA. We will present extensive numerical experiments in 2D and 3D concerning denoising, inpainting, and feature extraction, comparing the performance of ShearLab 3D with similar transform-based algorithms such as curvelets, contourlets, or surfacelets. In the spirit of reproducible research, all scripts are accessible on www.ShearLab.org.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Laguna:2016:EEU, author = "Ignacio Laguna and David F. Richards and Todd Gamblin and Martin Schulz and Bronis R. de Supinski and Kathryn Mohror and Howard Pritchard", title = "Evaluating and extending user-level fault tolerance in {MPI} applications", journal = j-IJHPCA, volume = "30", number = "3", pages = "305--319", year = "2016", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342015623623", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Apr 4 14:51:30 MDT 2017", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/full/10.1177/1094342015623623", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = aug, } @Article{Langr:2016:ASM, author = "Daniel Langr and Pavel Tvrdik and Ivan Simecek", title = "{AQsort}: Scalable Multi-Array In-Place Sorting with {OpenMP}", journal = j-SCPE, volume = "17", number = "4", pages = "369--391", month = "????", year = "2016", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:48 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1207", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Lashgar:2016:ESM, author = "Ahmad Lashgar and Amirali Baniasadi", title = "Employing Software-Managed Caches in {OpenACC}: Opportunities and Benefits", journal = j-TOMPECS, volume = "1", number = "1", pages = "2:1--2:34", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2798724", ISSN = "2376-3639 (print), 2376-3647 (electronic)", ISSN-L = "2376-3639", bibdate = "Thu Jun 15 12:29:10 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tompecs.bib", URL = "http://dl.acm.org/citation.cfm?id=2798724", abstract = "The OpenACC programming model has been developed to simplify accelerator programming and improve development productivity. In this article, we investigate the main limitations faced by OpenACC in harnessing all capabilities of GPU-like accelerators. We build on our findings and discuss the opportunity to exploit a software-managed cache as (i) a fast communication medium and (ii) a cache for data reuse. To this end, we propose a new directive and communication model for OpenACC. Investigating several benchmarks, we show that the proposed directive can improve performance up to $ 2.54 \times $, and at the cost of minor programming effort.", acknowledgement = ack-nhfb, articleno = "2", fjournal = "ACM Transactions on Modeling and Performance Evaluation of Computing Systems (TOMPECS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1525", } @Article{Lin:2016:VDF, author = "Yu-Te Lin and Jenq-Kuen Lee", title = "Vector data flow analysis for {SIMD} optimizations on {OpenCL} programs", journal = j-CCPE, volume = "28", number = "5", pages = "1629--1654", day = "10", month = apr, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3714", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sun Apr 3 12:34:13 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "24 Oct 2015", } @Article{Liu:2016:MBM, author = "Weifeng Liu and Michael Gerndt and Bin Gong", title = "Model-based {MPI-IO} tuning with {Periscope} tuning framework", journal = j-CCPE, volume = "28", number = "1", pages = "3--20", month = jan, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3603", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:21 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 Aug 2015", } @Article{Lobeiras:2016:DEI, author = "Jacobo Lobeiras and Margarita Amor and Ramon Doallo", title = "Designing Efficient Index-Digit Algorithms for {CUDA} {GPU} Architectures", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "5", pages = "1331--1343", month = may, year = "2016", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2015.2450718", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Apr 15 13:45:22 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.computer.org/csdl/trans/td/2016/05/07138631-abs.html", abstract-URL = "http://www.computer.org/csdl/trans/td/2016/05/07138631-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Loncar:2016:CPS, author = "Vladimir Loncar and Antun Balaz and Aleksandar Bogojevi{\'c} and Srdjan Skrbi{\'c} and Paulsamy Muruganandam and Sadhan K. Adhikari", title = "{CUDA} programs for solving the time-dependent dipolar {Gross--Pitaevskii} equation in an anisotropic trap", journal = j-COMP-PHYS-COMM, volume = "200", number = "??", pages = "406--410", month = mar, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu Jan 21 15:04:34 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515004361", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Loncar:2016:OOM, author = "Vladimir Loncar and Luis E. Young-S. and Srdjan Skrbi{\'c} and Paulsamy Muruganandam and Sadhan K. Adhikari and Antun Balaz", title = "{OpenMP}, {OpenMP\slash MPI}, and {CUDA\slash MPI} {C} programs for solving the time-dependent dipolar {Gross--Pitaevskii} equation", journal = j-COMP-PHYS-COMM, volume = "209", number = "??", pages = "190--196", month = dec, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Oct 18 17:55:23 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465516302272", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Maleki:2016:HOT, author = "Sepideh Maleki and Annie Yang and Martin Burtscher", title = "Higher-order and tuple-based massively-parallel prefix sums", journal = j-SIGPLAN, volume = "51", number = "6", pages = "539--552", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908089", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Prefix sums are an important parallel primitive, especially in massively-parallel programs. This paper discusses two orthogonal generalizations thereof, which we call higher-order and tuple-based prefix sums. Moreover, it describes and evaluates SAM, a GPU-friendly algorithm for computing prefix sums and other scans that directly supports higher orders and tuple values. Its templated CUDA implementation unifies all of these computations in a single 100-statement kernel. SAM is communication-efficient in the sense that it minimizes main-memory accesses. When computing prefix sums of a million or more values, it outperforms Thrust and CUDPP on both a Titan X and a K40 GPU. On the Titan X, SAM reaches memory-copy speeds for large input sizes, which cannot be surpassed. SAM outperforms CUB, the currently fastest conventional prefix sum implementation, by up to a factor of 2.9 on eighth-order prefix sums and by up to a factor of 2.6 on eight-tuple prefix sums.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '16 conference proceedings.", } @Article{Mallon:2016:MUB, author = "Dami{\'a}n A. Mall{\'o}n and Guillermo L. Taboada and Lars Koesterke", title = "{MPI} and {UPC} broadcast, scatter and gather algorithms in {Xeon Phi}", journal = j-CCPE, volume = "28", number = "8", pages = "2322--2340", day = "10", month = jun, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3552", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Jun 8 06:47:20 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Manca:2016:CQI, author = "Emanuele Manca and Andrea Manconi and Alessandro Orro and Giuliano Armano and Luciano Milanesi", title = "{CUDA-quicksort}: an improved {GPU}-based implementation of quicksort", journal = j-CCPE, volume = "28", number = "1", pages = "21--43", month = jan, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3611", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:21 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "12 Aug 2015", } @Article{Marendic:2016:NMR, author = "P. Marendic and J. Lemeire and D. Vucinic and P. Schelkens", title = "A novel {MPI} reduction algorithm resilient to imbalances in process arrival times", journal = j-J-SUPERCOMPUTING, volume = "72", number = "5", pages = "1973--2013", month = may, year = "2016", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1707-x", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon May 30 09:17:38 MDT 2016", bibsource = "http://link.springer.com/journal/11227/72/5; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s11227-016-1707-x", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Book{Matloff:2016:PCD, author = "Norman S. Matloff", title = "Parallel Computing for Data Science: with Examples in {R}, {C++} and {CUDA}", volume = "28", publisher = pub-CRC, address = pub-CRC:adr, pages = "xxiii + 324", year = "2016", ISBN = "1-4665-8701-6 (hardcover)", ISBN-13 = "978-1-4665-8701-4 (hardcover)", LCCN = "QA76.642 M37 2016", bibdate = "Sat Jun 27 09:13:41 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/s-plus.bib; z3950.loc.gov:7090/Voyager", series = "Chapman and Hall/CRC: The R Series", URL = "http://www.tandf.net/books/details/9781466587014", abstract = "\booktitle{Parallel Computing for Data Science: With Examples in R, C++ and CUDA} is one of the first parallel computing books to concentrate exclusively on parallel data structures, algorithms, software tools, and applications in data science. It includes examples not only from the classic ``$n$ observations, $p$ variables'' matrix format but also from time series, network graph models, and numerous other structures common in data science. The examples illustrate the range of issues encountered in parallel programming. With the main focus on computation, the book shows how to compute on three types of platforms.", acknowledgement = ack-nhfb, subject = "Parallel programming (Computer science); Electronic data processing; R (Computer program language)", tableofcontents = "Preface \\ Author's Biography \\ 1: Introduction to Parallel Processing in R \\ 2: ``Why Is My Program So Slow?'': Obstacles to Speed \\ 3: Principles of Parallel Loop Scheduling \\ 4: The Shared-Memory Paradigm: A Gentle Introduction via R \\ 5: The Shared-Memory Paradigm: C Level \\ 6: The Shared-Memory Paradigm: GPUs \\ 7: Thrust and Rth \\ 8: The Message Passing Paradigm \\ 9: MapReduce Computation \\ 10: Parallel Sorting and Merging \\ 11: Parallel Pre x Scan \\ 12: Parallel Matrix Operations \\ 13: Inherently Statistical Approaches: Subset Methods \\ Appendix A: Review of Matrix Algebra \\ Appendix B: R Quick Start \\ Appendix C: Introduction to C for R Programmers \\ Back Cover", } @Article{Muddukrishna:2016:GGO, author = "Ananya Muddukrishna and Peter A. Jonsson and Artur Podobas and Mats Brorsson", title = "Grain graphs: {OpenMP} performance analysis made easy", journal = j-SIGPLAN, volume = "51", number = "8", pages = "28:1--28:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851156", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Average programmers struggle to solve performance problems in OpenMP programs with tasks and parallel for-loops. Existing performance analysis tools visualize OpenMP task performance from the runtime system's perspective where task execution is interleaved with other tasks in an unpredictable order. Problems with OpenMP parallel for-loops are similarly difficult to resolve since tools only visualize aggregate thread-level statistics such as load imbalance without zooming into a per-chunk granularity. The runtime system/threads oriented visualization provides poor support for understanding problems with task and chunk execution time, parallelism, and memory hierarchy utilization, forcing average programmers to rely on experts or use tedious trial-and-error tuning methods for performance. We present grain graphs, a new OpenMP performance analysis method that visualizes grains --- computation performed by a task or a parallel for-loop chunk instance --- and highlights problems such as low parallelism, work inflation and poor parallelization benefit at the grain level. We demonstrate that grain graphs can quickly reveal performance problems that are difficult to detect and characterize in fine detail using existing visualizations in standard OpenMP programs, simplifying OpenMP performance analysis. This enables average programmers to make portable optimizations for poor performing OpenMP programs, reducing pressure on experts and removing the need for tedious trial-and-error tuning.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '16 conference proceedings.", } @Misc{Munshi:2016:OCS, author = "Aaftab Munshi and Lee Howes and Bartosz Sochacki and {Khronos OpenCL Working Group}", title = "The {OpenCL} {C} Specification Version: 2.0 Document Revision: 33", howpublished = "Web document.", pages = "205", day = "13", month = apr, year = "2016", bibdate = "Mon Apr 16 14:05:49 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/elefunt.bib; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.khronos.org/registry/OpenCL/specs/opencl-2.0-openclc.pdf", acknowledgement = ack-nhfb, remark = "Section 6.1.3.2 Math Functions, pages 74ff, defines a function repertoire extended beyond that of ISO C, including {\tt acospi}, {\tt asinpi}, {\tt atanpi}, {\tt atan2pi}, {\tt cospi}, {\tt sinpi}, {\tt tanpi}, {\tt cospi}, {\tt fract}, {\tt lgamma\_r}, {\tt mad} (approximation to {\tt a * b + c}), {\tt minmag}, {\tt pown}, {\tt rootn}, {\tt sincos}, {\tt sinpi}, and {\tt tanpi}.", } @Article{Nadal-Serrano:2016:PSC, author = "Jose M. Nadal-Serrano and Marisa Lopez-Vallejo", title = "A Performance Study of {CUDA UVM} versus Manual Optimizations in a Real-World Setup: Application to a {Monte Carlo} Wave-Particle Event-Based Interaction Model", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "6", pages = "1579--1588", month = jun, year = "2016", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2015.2463813", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Jun 14 09:25:28 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://csdl.computer.org/csdl/trans/td/2016/06/07175058-abs.html", abstract-URL = "http://csdl.computer.org/csdl/trans/td/2016/06/07175058-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Naumenko:2016:ACT, author = "Mikhail A. Naumenko and Vyacheslav V. Samarin", title = "Application of {CUDA} technology to calculation of ground states of few-body nuclei by {Feynman}'s continual integrals method", journal = j-SUPERFRI, volume = "3", number = "2", pages = "80--95", month = "????", year = "2016", CODEN = "????", ISSN = "2409-6008 (print), 2313-8734 (electronic)", bibdate = "Sat Nov 11 07:15:27 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/superfri.bib", URL = "http://superfri.org/superfri/article/view/102", acknowledgement = ack-nhfb, fjournal = "Supercomputing Frontiers and Innovations", journal-URL = "http://superfri.org/superfri/issue/archive", } @Article{Nogueira:2016:BBW, author = "David Nogueira and Pedro Tomas and Nuno Roma", title = "{BowMapCL}: {Burrows--Wheeler} Mapping on Multiple Heterogeneous Accelerators", journal = j-TCBB, volume = "13", number = "5", pages = "926--938", month = sep, year = "2016", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2015.2495149", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Dec 30 16:19:30 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "The computational demand of exact-search procedures has pressed the exploitation of parallel processing accelerators to reduce the execution time of many applications. However, this often imposes strict restrictions in terms of the problem size and implementation efforts, mainly due to their possibly distinct architectures. To circumvent this limitation, a new exact-search alignment tool BowMapCL based on the Burrows--Wheeler Transform and FM-Index is presented. Contrasting to other alternatives, BowMapCL is based on a unified implementation using OpenCL, allowing the exploitation of multiple and possibly different devices e.g., NVIDIA, AMD/ATI, and Intel GPUs/APUs. Furthermore, to efficiently exploit such heterogeneous architectures, BowMapCL incorporates several techniques to promote its performance and scalability, including multiple buffering, work-queue task-distribution, and dynamic load-balancing, together with index partitioning, bit-encoding, and sampling. When compared with state-of-the-art tools, the attained results showed that BowMapCL using a single GPU is $ 2 \times $ to $ 7.5 \times $ faster than mainstream multi-threaded CPU BWT-based aligners, like Bowtie, BWA, and SOAP2; and up to $ 4 \times $ faster than the best performing state-of-the-art GPU implementations namely, SOAP3 and HPG-BWT. When multiple and completely distinct devices are considered, BowMapCL efficiently scales the offered throughput, ensuring a convenient load-balance of the involved processing in the several distinct devices.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Oger:2016:DMM, author = "G. Oger and D. {Le Touz{\'e}} and D. Guibert and M. de Leffe and J. Biddiscombe and J. Soumagne and J.-G. Piccinali", title = "On distributed memory {MPI}-based parallelization of {SPH} codes in massive {HPC} context", journal = j-COMP-PHYS-COMM, volume = "200", number = "??", pages = "1--14", month = mar, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu Jan 21 15:04:34 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515003070", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Otten:2016:MOI, author = "Matthew Otten and Jing Gong and Azamat Mametjanov and Aaron Vose and John Levesque and Paul Fischer and Misun Min", title = "An {MPI\slash OpenACC} implementation of a high-order electromagnetics solver with {GPUDirect} communication", journal = j-IJHPCA, volume = "30", number = "3", pages = "320--334", month = aug, year = "2016", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Apr 4 14:51:30 MDT 2017", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Pai:2016:CTO, author = "Sreepathi Pai and Keshav Pingali", title = "A compiler for throughput optimization of graph algorithms on {GPUs}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "1--19", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984015", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Writing high-performance GPU implementations of graph algorithms can be challenging. In this paper, we argue that three optimizations called throughput optimizations are key to high-performance for this application class. These optimizations describe a large implementation space making it unrealistic for programmers to implement them by hand. To address this problem, we have implemented these optimizations in a compiler that produces CUDA code from an intermediate-level program representation called IrGL. Compared to state-of-the-art handwritten CUDA implementations of eight graph applications, code generated by the IrGL compiler is up to 5.95x times faster (median 1.4x) for five applications and never more than 30\% slower for the others. Throughput optimizations contribute an improvement up to 4.16x (median 1.4x) to the performance of unoptimized IrGL code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "OOPSLA '16 conference proceedings.", } @Article{Pang:2016:MKR, author = "Yeyong Pang and Shaojun Wang and Yu Peng and Xiyuan Peng and Nicholas J. Fraser and Philip H. W. Leong", title = "A Microcoded Kernel Recursive Least Squares Processor Using {FPGA} Technology", journal = j-TRETS, volume = "10", number = "1", pages = "5:1--5:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2950061", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Mon Apr 3 11:34:09 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", abstract = "Kernel methods utilize linear methods in a nonlinear feature space and combine the advantages of both. Online kernel methods, such as kernel recursive least squares (KRLS) and kernel normalized least mean squares (KNLMS), perform nonlinear regression in a recursive manner, with similar computational requirements to linear techniques. In this article, an architecture for a microcoded kernel method accelerator is described, and high-performance implementations of sliding-window KRLS, fixed-budget KRLS, and KNLMS are presented. The architecture utilizes pipelining and vectorization for performance, and microcoding for reusability. The design can be scaled to allow tradeoffs between capacity, performance, and area. The design is compared with a central processing unit (CPU), digital signal processor (DSP), and Altera OpenCL implementations. In different configurations on an Altera Arria 10 device, our SW-KRLS implementation delivers floating-point throughput of approximately 16 GFLOPs, latency of 5.5 $ \mu $ s, and energy consumption of $ 10^{- 4} $ J, these being improvements over a CPU by factors of 12, 17, and 24, respectively.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Article{Peraza:2016:PGQ, author = "Joshua Peraza and Ananta Tiwari and Michael Laurenzano and Laura Carrington and Allan Snavely", title = "{PMaC}'s green queue: a framework for selecting energy optimal {DVFS} configurations in large scale {MPI} applications", journal = j-CCPE, volume = "28", number = "2", pages = "211--231", month = feb, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3184", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:21 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "27 Dec 2013", } @Article{Pirk:2016:VVA, author = "Holger Pirk and Oscar Moll and Matei Zaharia and Sam Madden", title = "{Voodoo} --- a vector algebra for portable database performance on modern hardware", journal = j-PROC-VLDB-ENDOWMENT, volume = "9", number = "14", pages = "1707--1718", month = oct, year = "2016", CODEN = "????", ISSN = "2150-8097", bibdate = "Wed Oct 12 10:14:56 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", abstract = "In-memory databases require careful tuning and many engineering tricks to achieve good performance. Such database performance engineering is hard: a plethora of data and hardware-dependent optimization techniques form a design space that is difficult to navigate for a skilled engineer --- even more so for a query compiler. To facilitate performance-oriented design exploration and query plan compilation, we present Voodoo, a declarative intermediate algebra that abstracts the detailed architectural properties of the hardware, such as multi- or many-core architectures, caches and SIMD registers, without losing the ability to generate highly tuned code. Because it consists of a collection of declarative, vector-oriented operations, Voodoo is easier to reason about and tune than low-level C and related hardware-focused extensions (Intrinsics, OpenCL, CUDA, etc.). This enables our Voodoo compiler to produce (OpenCL) code that rivals and even outperforms the fastest state-of-the-art in memory databases for both GPUs and CPUs. In addition, Voodoo makes it possible to express techniques as diverse as cache-conscious processing, predication and vectorization (again on both GPUs and CPUs) with just a few lines of code. Central to our approach is a novel idea we termed control vectors, which allows a code generating frontend to expose parallelism to the Voodoo compiler in a abstract manner, enabling portable performance across hardware platforms. We used Voodoo to build an alternative backend for MonetDB, a popular open-source in-memory database. Our backend allows MonetDB to perform at the same level as highly tuned in-memory databases, including HyPeR and Ocelot. We also demonstrate Voodoo's usefulness when investigating hardware conscious tuning techniques, assessing their performance on different queries, devices and data.", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", journal-URL = "http://portal.acm.org/citation.cfm?id=J1174", } @Article{Prabhakar:2016:GCH, author = "Raghu Prabhakar and David Koeplinger and Kevin J. Brown and HyoukJoong Lee and Christopher {De Sa} and Christos Kozyrakis and Kunle Olukotun", title = "Generating Configurable Hardware from Parallel Patterns", journal = j-SIGPLAN, volume = "51", number = "4", pages = "651--665", month = apr, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2954679.2872415", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Thu Jun 9 17:13:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In recent years the computing landscape has seen an increasing shift towards specialized accelerators. Field programmable gate arrays (FPGAs) are particularly promising for the implementation of these accelerators, as they offer significant performance and energy improvements over CPUs for a wide class of applications and are far more flexible than fixed-function ASICs. However, FPGAs are difficult to program. Traditional programming models for reconfigurable logic use low-level hardware description languages like Verilog and VHDL, which have none of the productivity features of modern software languages but produce very efficient designs, and low-level software languages like C and OpenCL coupled with high-level synthesis (HLS) tools that typically produce designs that are far less efficient. Functional languages with parallel patterns are a better fit for hardware generation because they provide high-level abstractions to programmers with little experience in hardware design and avoid many of the problems faced when generating hardware from imperative languages. In this paper, we identify two important optimizations for using parallel patterns to generate efficient hardware: tiling and metapipelining. We present a general representation of tiled parallel patterns, and provide rules for automatically tiling patterns and generating metapipelines. We demonstrate experimentally that these optimizations result in speedups up to 39.4$ \times $ on a set of benchmarks from the data analytics domain.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '16 conference proceedings.", } @Article{Prades:2016:CAX, author = "Javier Prades and Carlos Rea{\~n}o and Federico Silla", title = "{CUDA} acceleration for {Xen} virtual machines in {InfiniBand} clusters with {rCUDA}", journal = j-SIGPLAN, volume = "51", number = "8", pages = "35:1--35:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851181", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Many data centers currently use virtual machines (VMs) to achieve a more efficient usage of hardware resources. However, current virtualization solutions, such as Xen, do not easily provide graphics processing unit (GPU) accelerators to applications running in the virtualized domain with the flexibility usually required in data centers (i.e., managing virtual GPU instances and concurrently sharing them among several VMs). Remote GPU virtualization frameworks such as the rCUDA solution may address this problem. In this work we analyze the use of the rCUDA framework to accelerate scientific applications running inside Xen VMs. Results show that the use of the rCUDA framework is a feasible approach, featuring a very low overhead if an InfiniBand fabric is already present in the cluster.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '16 conference proceedings.", } @Article{Rehman:2016:VMJ, author = "Waqas Ur Rehman and Muhammad Sohaib Ayub and Junaid Haroon Siddiqui", title = "Verification of {MPI} {Java} programs using software model checking", journal = j-SIGPLAN, volume = "51", number = "8", pages = "55:1--55:??", month = aug, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3016078.2851192", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Development of concurrent software requires the programmer to be aware of non-determinism, data races, and deadlocks. MPI (message passing interface) is a popular standard for writing message oriented distributed applications. Some messages in MPI systems can be processed by one of the many machines and in many possible orders. This non-determinism can affect the result of an MPI application. The alternate results may or may not be correct. To verify MPI applications, we need to check all these possible orderings and use an application specific oracle to decide if these orderings give correct output. MPJ Express is an open source Java implementation of the MPI standard. We developed a Java based model of MPJ Express, where processes are modeled as threads, and which can run unmodified MPI Java programs on a single system. This enabled us to adapt the Java PathFinder explicit state software model checker (JPF) using a custom listener to verify our model running real MPI Java programs. We evaluated our approach using small examples where model checking revealed message orders that would result in incorrect system behavior.", acknowledgement = ack-nhfb, articleno = "55", fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '16 conference proceedings.", } @Article{Rico-Gallego:2016:EIL, author = "Juan-Antonio Rico-Gallego and Juan-Carlos D{\'\i}az-Mart{\'\i}n and Alexey L. Lastovetsky", title = "Extending {$ \tau $}-Lop to model concurrent {MPI} communications in multicore clusters", journal = j-FUT-GEN-COMP-SYS, volume = "61", number = "??", pages = "66--82", month = aug, year = "2016", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Apr 27 09:38:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X16300346", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X/", } @Article{Sandes:2016:CIS, author = "Edans Flavius de Oliveira Sandes and Guillermo Miranda and Xavier Martorell and Eduard Ayguade and George Teodoro and Alba Cristina Magalhaes Melo", title = "{CUDAlign 4.0}: Incremental Speculative Traceback for Exact Chromosome-Wide Alignment in {GPU} Clusters", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "10", pages = "2838--2850", month = oct, year = "2016", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2016.2515597", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Sep 13 06:32:59 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2016/10/07374729-abs.html", abstract-URL = "https://www.computer.org/csdl/trans/td/2016/10/07374729-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Sandes:2016:MMA, author = "Edans F. De O. Sandes and Guillermo Miranda and Xavier Martorell and Eduard Ayguade and George Teodoro and Alba C. M. A. {De Melo}", title = "{MASA}: a Multiplatform Architecture for Sequence Aligners with Block Pruning", journal = j-TOPC, volume = "2", number = "4", pages = "28:1--28:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2858656", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Sat Mar 19 08:11:13 MDT 2016", bibsource = "http://topc.acm.org/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Biological sequence alignment is a very popular application in Bioinformatics, used routinely worldwide. Many implementations of biological sequence alignment algorithms have been proposed for multicores, GPUs, FPGAs and CellBEs. These implementations are platform-specific; porting them to other systems requires considerable programming effort. This article proposes and evaluates MASA, a flexible and customizable software architecture that enables the execution of biological sequence alignment applications with three variants (local, global, and semiglobal) in multiple hardware/software platforms with block pruning, which is able to reduce significantly the amount of data processed. To attain our flexibility goals, we also propose a generic version of block pruning and developed multiple parallelization strategies as building blocks, including a new asynchronous dataflow-based parallelization, which may be combined to implement efficient aligners in different platforms. We provide four MASA aligner implementations for multicores (OmpSs and OpenMP), GPU (CUDA), and Intel Phi (OpenMP), showing that MASA is very flexible. The evaluation of our generic block pruning strategy shows that it significantly outperforms the previously proposed block pruning, being able to prune up to 66.5\% of the cells when using the new dataflow-based parallelization strategy.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", remark = "Special Issue on PPoPP'14 conference.", } @Article{Sataric:2016:HOM, author = "Bogdan Satari{\'c} and Vladimir Slavni{\'c} and Aleksandar Beli{\'c} and Antun Balaz and Paulsamy Muruganandam and Sadhan K. Adhikari", title = "Hybrid {OpenMP\slash MPI} programs for solving the time-dependent {Gross--Pitaevskii} equation in a fully anisotropic trap", journal = j-COMP-PHYS-COMM, volume = "200", number = "??", pages = "411--417", month = mar, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu Jan 21 15:04:34 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465515004440", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Schenck:2016:EPM, author = "Wolfram Schenck and Salem {El Sayed} and Maciej Foszczynski and Wilhelm Homberg and Dirk Pleiter", title = "Evaluation and Performance Modeling of a Burst Buffer Solution", journal = j-OPER-SYS-REV, volume = "50", number = "3", pages = "12--26", month = dec, year = "2016", CODEN = "OSRED8", DOI = "https://doi.org/10.1145/3041710.3041714", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Thu Feb 9 10:38:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/opersysrev.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Hierarchical storage architectures are required to meet both, capacity and bandwidth requirements for future high-end storage architectures. In this paper we present the results of an evaluation of an emerging technology, DataDirect Networks' (DDN) Infinite Memory Engine (IME). IME allows to realize a fast buffer in front of a large capacity storage system. We collected benchmarking data with IOR and with the HPC application NEST. The IOR bandwidth results show how well network bandwidth towards such fast buffer can be exploited compared to the external storage system. The NEST benchmarks clearly demonstrate that IME can reduce I/O-induced load imbalance between MPI ranks to a minimum while speeding up I/O as a whole by a considerable factor. In addition to these direct measurements, a performance model for NEST is developed. In combination with a generic and abstract burst buffer architecture, this model generates predictions about appropriate burst buffer and I/O parameters to achieve specific performance goals for NEST on HPC clusters of varying size. Specifically, it is investigated in which parameter range burst buffers are able to counteract the widening performance gap between compute and I/O.", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597", } @Article{Soldado:2016:ECM, author = "F{\'a}bio Soldado and Fernando Alexandre and Herv{\'e} Paulino", title = "Execution of compound multi-kernel {OpenCL} computations in {multi-CPU\slash multi-GPU} environments", journal = j-CCPE, volume = "28", number = "3", pages = "768--787", day = "10", month = mar, year = "2016", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3612", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 9 06:13:22 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "28 Aug 2015", } @Article{Sorensen:2016:EER, author = "Tyler Sorensen and Alastair F. Donaldson", title = "Exposing errors related to weak memory in {GPU} applications", journal = j-SIGPLAN, volume = "51", number = "6", pages = "100--113", month = jun, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/2980983.2908114", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Mon Sep 5 07:32:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present the systematic design of a testing environment that uses stressing and fuzzing to reveal errors in GPU applications that arise due to weak memory effects. We evaluate our approach on seven GPUs spanning three Nvidia architectures, across ten CUDA applications that use fine-grained concurrency. Our results show that applications that rarely or never exhibit errors related to weak memory when executed natively can readily exhibit these errors when executed in our testing environment. Our testing environment also provides a means to help identify the root causes of such errors, and automatically suggests how to insert fences that harden an application against weak memory bugs. To understand the cost of GPU fences, we benchmark applications with fences provided by the hardening strategy as well as a more conservative, sound fencing strategy.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '16 conference proceedings.", } @Article{Sorensen:2016:PIW, author = "Tyler Sorensen and Alastair F. Donaldson and Mark Batty and Ganesh Gopalakrishnan and Zvonimir Rakamari{\'c}", title = "Portable inter-workgroup barrier synchronisation for {GPUs}", journal = j-SIGPLAN, volume = "51", number = "10", pages = "39--58", month = oct, year = "2016", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3022671.2984032", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Despite the growing popularity of GPGPU programming, there is not yet a portable and formally-specified barrier that one can use to synchronise across workgroups. Moreover, the occupancy-bound execution model of GPUs breaks assumptions inherent in traditional software execution barriers, exposing them to deadlock. We present an occupancy discovery protocol that dynamically discovers a safe estimate of the occupancy for a given GPU and kernel, allowing for a starvation-free (and hence, deadlock-free) inter-workgroup barrier by restricting the number of workgroups according to this estimate. We implement this idea by adapting an existing, previously non-portable, GPU inter-workgroup barrier to use OpenCL 2.0 atomic operations, and prove that the barrier meets its natural specification in terms of synchronisation. We assess the portability of our approach over eight GPUs spanning four vendors, comparing the performance of our method against alternative methods. Our key findings include: (1){\^A} the recall of our discovery protocol is nearly 100\%; (2){\^A} runtime comparisons vary substantially across GPUs and applications; and (3){\^A} our method provides portable and safe inter-workgroup synchronisation across the applications we study.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "OOPSLA '16 conference proceedings.", } @Article{Tampouratzis:2016:AIH, author = "Nikolaos Tampouratzis and Pavlos M. Mattheakis and Ioannis Papaefstathiou", title = "Accelerating Intercommunication in Highly Parallel Systems", journal = j-TACO, volume = "13", number = "4", pages = "40:1--40:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/3005717", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Wed Dec 28 16:24:46 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Every HPC system consists of numerous processing nodes interconnect using a number of different inter-process communication protocols such as Messaging Passing Interface (MPI) and Global Arrays (GA). Traditionally, research has focused on optimizing these protocols and identifying the most suitable ones for each system and/or application. Recently, there has been a proposal to unify the primitive operations of the different inter-processor communication protocols through the Portals library. Portals offer a set of low-level communication routines which can be composed in order to implement the functionality of different intercommunication protocols. However, Portals modularity comes at a performance cost, since it adds one more layer in the actual protocol implementation. This work aims at closing the performance gap between a generic and reusable intercommunication layer, such as Portals, and the several monolithic and highly optimized intercommunication protocols. This is achieved through the development of a novel hardware offload engine efficiently implementing the basic Portals' modules. Our innovative system is up to two2 orders of magnitude faster than the conventional software implementation of Portals' while the speedup achieved over the conventional monolithic software implementations of MPI and GAs is more than an order of magnitude. The power consumption of our hardware system is less than 1/100th of what a low-power CPU consumes when executing the Portal's software while its silicon cost is less than 1/10th of that of a very simple RISC CPU. Moreover, our design process is also innovative since we have first modeled the hardware within an untimed virtual prototype which allowed for rapid design space exploration; then we applied a novel methodology to transform the untimed description into an efficient timed hardware description, which was then transformed into a hardware netlist through a High-Level Synthesis (HLS) tool.", acknowledgement = ack-nhfb, articleno = "40", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Tang:2016:AKM, author = "Qing Y. Tang and Mohammed A. S. Khalid", title = "Acceleration of $k$-Means Algorithm Using {Altera SDK} for {OpenCL}", journal = j-TRETS, volume = "10", number = "1", pages = "6:1--6:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2964910", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Mon Apr 3 11:34:09 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", abstract = "A K-means clustering algorithm involves partitioning of data iteratively into k clusters. It is one of the most popular data-mining algorithms [Wu et al. 2007], and is widely used in other applications, such as image processing and machine learning. However, k-means is highly time-consuming when data or cluster size is large. Traditionally, FPGAs have shown great promise for accelerating computationally intensive algorithms, but they are harder to use for acceleration if we rely on traditional HD-based design methods. The recent introduction of Altera SDK for the OpenCL high-level synthesis tool allows developers to utilize FPGA's potential without long development periods and extensive hardware knowledge. This article presents an optimized implementation of a k-means clustering algorithm on an FPGA using Altera SDK for OpenCL. Performance and power consumption is measured with various data, cluster, and dimension sizes. When compared to state-of-the-art solutions, this implementation supports larger cluster sizes, offers up to 21x speed over a CPU and is more power efficient than a GPU. Unlike previous implementations, it can deliver consistently high throughput across large or small feature dimensions given reasonable cluster sizes and large enough data size.", acknowledgement = ack-nhfb, articleno = "6", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Article{Vega-Gisbert:2016:DIJ, author = "Oscar Vega-Gisbert and Jose E. Roman and Jeffrey M. Squyres", title = "Design and implementation of {Java} bindings in {Open MPI}", journal = j-PARALLEL-COMPUTING, volume = "59", number = "??", pages = "1--20", month = nov, year = "2016", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Nov 26 12:06:01 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300758", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Wang:2016:LLA, author = "Jin Wang and Norm Rubin and Albert Sidelnik and Sudhakar Yalamanchili", title = "{LaPerm}: locality aware scheduler for dynamic parallelism on {GPUs}", journal = j-COMP-ARCH-NEWS, volume = "44", number = "3", pages = "583--595", month = jun, year = "2016", CODEN = "CANED2", DOI = "https://doi.org/10.1145/3007787.3001199", ISSN = "0163-5964 (print), 1943-5851 (electronic)", ISSN-L = "0163-5964", bibdate = "Thu Jan 12 18:43:43 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigarch.bib", abstract = "Recent developments in GPU execution models and architectures have introduced dynamic parallelism to facilitate the execution of irregular applications where control flow and memory behavior can be unstructured, time-varying, and hierarchical. The changes brought about by this extension to the traditional bulk synchronous parallel (BSP) model also creates new challenges in exploiting the current GPU memory hierarchy. One of the major challenges is that the reference locality that exists between the parent and child thread blocks (TBs) created during dynamic nested kernel and thread block launches cannot be fully leveraged using the current TB scheduling strategies. These strategies were designed for the current implementations of the BSP model but fall short when dynamic parallelism is introduced since they are oblivious to the hierarchical reference locality. We propose LaPerm, a new locality-aware TB scheduler that exploits such parent-child locality, both spatial and temporal. LaPerm adopts three different scheduling decisions to (i) prioritize the execution of the child TBs, (ii) bind them to the stream multiprocessors (SMXs) occupied by their parents TBs, and (iii) maintain workload balance across compute units. Experiments with a set of irregular CUDA applications executed on a cycle-level simulator employing dynamic parallelism demonstrate that LaPerm is able to achieve an average of 27\% performance improvement over the baseline round-robin TB scheduler commonly used in modern GPUs.", acknowledgement = ack-nhfb, fjournal = "ACM SIGARCH Computer Architecture News", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89", remark = "ISCA '16 conference proceedings.", } @Article{Wang:2016:MMF, author = "Zeke Wang and Shuhao Zhang and Bingsheng He and Wei Zhang", title = "{Melia}: A {MapReduce} Framework on {OpenCL}-Based {FPGAs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "27", number = "12", pages = "3547--3560", month = dec, year = "2016", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Nov 16 18:43:09 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2016/12/07425227-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Witchel:2016:PPW, author = "Emmett Witchel", title = "Programmer Productivity in a World of Mushy Interfaces: Challenges of the Post-{ISA} Reality", journal = j-OPER-SYS-REV, volume = "50", number = "2", pages = "591--591", month = jun, year = "2016", CODEN = "OSRED8", DOI = "https://doi.org/10.1145/2954680.2876511", ISSN = "0163-5980 (print), 1943-586X (electronic)", ISSN-L = "0163-5980", bibdate = "Thu Jun 9 17:03:34 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/opersysrev.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Since 1964, we had the notion that the instruction set architecture (ISA) is a useful and fairly opaque abstraction layer between hardware and software. Software rode hardware's performance wave while remaining gloriously oblivious to hardware's growing complexity. Unfortunately, the jig is up. We still have ISAs, but the abstraction no longer offers seamless portability---parallel software needs to be tuned for different core counts, and heterogeneous processing elements (CPUs, GPUs, accelerators) further complicate programmability. We are better at building large-scale heterogeneous processors than we are at programming them. Maintaining software across multiple current platforms is difficult and porting to future platforms is also difficult. There have been many technical responses: virtual ISAs (e.g., NVIDIA's PTX), higher-level programming interfaces (e.g., CUDA or OpenCL), and late-stage compilation and platform-specific tailoring (e.g., Android ART), etc. A team of opinionated experts, drawn from the three ASPLOS communities will examine the problem of programmer productivity in the post-ISA world, first from the perspective of their area of expertise and then noting the contributions from the other two communities. What research will save us and how? This wide-ranging debate will frame important research areas for future work while being grounded in frank discussion about what has succeeded in the past. Attendees can expect actionable insight into important research issues as well an entertaining discussion.", acknowledgement = ack-nhfb, fjournal = "Operating Systems Review", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597", } @Article{Yang:2016:HTM, author = "Fan Yang and Jinfeng Li and James Cheng", title = "{Husky}: towards a more efficient and expressive distributed computing framework", journal = j-PROC-VLDB-ENDOWMENT, volume = "9", number = "5", pages = "420--431", month = jan, year = "2016", CODEN = "????", ISSN = "2150-8097", bibdate = "Mon Jan 11 17:54:24 MST 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", abstract = "Finding efficient, expressive and yet intuitive programming models for data-parallel computing system is an important and open problem. Systems like Hadoop and Spark have been widely adopted for massive data processing, as coarse-grained primitives like map and reduce are succinct and easy to master. However, sometimes over-simplified API hinders programmers from more fine-grained control and designing more efficient algorithms. Developers may have to resort to sophisticated domain-specific languages (DSLs), or even low-level layers like MPI, but this raises development cost---learning many mutually exclusive systems prolongs the development schedule, and the use of low-level tools may result in bug-prone programming. This motivated us to start the Husky open-source project, which is an attempt to strike a better balance between high performance and low development cost. Husky is developed mainly for in-memory large scale data mining, and also serves as a general research platform for designing efficient distributed algorithms. We show that many existing frameworks can be easily implemented and bridged together inside Husky, and Husky is able to achieve similar or even better performance compared with domain-specific systems.", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", journal-URL = "http://portal.acm.org/citation.cfm?id=J1174", } @Article{Young-S:2016:OFP, author = "Luis E. Young-S. and Dusan Vudragovi{\'c} and Paulsamy Muruganandam and Sadhan K. Adhikari and Antun Balaz", title = "{OpenMP Fortran} and {C} programs for solving the time-dependent {Gross--Pitaevskii} equation in an anisotropic trap", journal = j-COMP-PHYS-COMM, volume = "204", number = "??", pages = "209--213", month = jul, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri May 13 19:25:21 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/fortran2.bib; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S001046551630073X", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Zaza:2016:CBP, author = "Ayham Zaza and Abeeb A. Awotunde and Faisal A. Fairag and Mayez A. Al-Mouhamed", title = "A {CUDA} based parallel multi-phase oil reservoir simulator", journal = j-COMP-PHYS-COMM, volume = "206", number = "??", pages = "2--16", month = sep, year = "2016", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 10 18:27:25 MDT 2016", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465516300996", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Agullo:2017:BGB, author = "Emmanuel Agullo and Olivier Aumage and Berenger Bramas and Olivier Coulaud and Samuel Pitoiset", title = "Bridging the Gap Between {OpenMP} and Task-Based Runtime Systems for the {Fast Multipole Method}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "28", number = "10", pages = "2794--2807", month = oct, year = "2017", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2697857", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Oct 12 06:58:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2017/10/07912335-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Al-Refaie:2017:PAH, author = "Ahmed F. Al-Refaie and Jonathan Tennyson", title = "A parallel algorithm for {Hamiltonian} matrix construction in electron-molecule collision calculations: {MPI--SCATCI}", journal = j-COMP-PHYS-COMM, volume = "221", number = "??", pages = "53--62", month = dec, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Oct 16 14:20:16 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517302436", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Al-Refaie:2017:PCT, author = "Ahmed F. Al-Refaie and Sergei N. Yurchenko and Jonathan Tennyson", title = "{{\bf G}PU {\bf A}ccelerated {\bf IN}tensities MPI (GAIN-MPI)}: a new method of computing {Einstein-$A$} coefficients", journal = j-COMP-PHYS-COMM, volume = "214", number = "??", pages = "216--224", month = may, year = "2017", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2017.01.013", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Mar 3 06:05:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300255", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Aliaga:2017:CTP, author = "Jos{\'e} I. Aliaga and Mar{\'\i}a Barreda and Goran Flegar and Matthias Bollh{\"o}fer and Enrique S. Quintana-Ort{\'\i}", title = "Communication in task-parallel {ILU}-preconditioned {CG} solvers using {MPI + OmpSs}", journal = j-CCPE, volume = "29", number = "21", pages = "??--??", day = "10", month = nov, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4280", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Dec 30 09:11:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Alvanos:2017:PMM, author = "Michail Alvanos and Theodoros Christoudias", title = "\pkg{MEDINA}: {MECCA} Development in Accelerators --- {KPP Fortran} to {CUDA} source-to-source Pre-processor", journal = j-J-OPEN-RES-SOFT, volume = "5", number = "1", pages = "13--??", day = "28", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.5334/jors.158", ISSN = "2049-9647", ISSN-L = "2049-9647", bibdate = "Sat Sep 8 10:03:50 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/jors.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://openresearchsoftware.metajnl.com/articles/10.5334/jors.158/", acknowledgement = ack-nhfb, fjournal = "Journal of Open Research Software", journal-URL = "https://openresearchsoftware.metajnl.com/issue/archive/", } @Article{Anderson:2017:BGB, author = "Michael Anderson and Shaden Smith and Narayanan Sundaram and Mihai Capota and Zheguang Zhao and Subramanya Dulloor and Nadathur Satish and Theodore L. Willke", title = "Bridging the gap between {HPC} and big data frameworks", journal = j-PROC-VLDB-ENDOWMENT, volume = "10", number = "8", pages = "901--912", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.14778/3090163.3090168", ISSN = "2150-8097", bibdate = "Fri Jun 23 17:12:46 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", abstract = "Apache Spark is a popular framework for data analytics with attractive features such as fault tolerance and interoperability with the Hadoop ecosystem. Unfortunately, many analytics operations in Spark are an order of magnitude or more slower compared to native implementations written with high performance computing tools such as MPI. There is a need to bridge the performance gap while retaining the benefits of the Spark ecosystem such as availability, productivity, and fault tolerance. In this paper, we propose a system for integrating MPI with Spark and analyze the costs and benefits of doing so for four distributed graph and machine learning applications. We show that offloading computation to an MPI environment from within Spark provides 3.1--17.7$ \times $ speedups on the four sparse applications, including all of the overheads. This opens up an avenue to reuse existing MPI libraries in Spark with little effort.", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", journal-URL = "http://portal.acm.org/citation.cfm?id=J1174", } @Article{Arteaga:2017:GFG, author = "Jaime Arteaga and St{\'e}phane Zuckerman and Guang R. Gao", title = "Generating Fine-Grain Multithreaded Applications Using a Multigrain Approach", journal = j-TACO, volume = "14", number = "4", pages = "47:1--47:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3155288", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Dec 22 18:25:55 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "The recent evolution in hardware landscape, aimed at producing high-performance computing systems capable of reaching extreme-scale performance, has reignited the interest in fine-grain multithreading, particularly at the intranode level. Indeed, popular parallel programming environments, such as OpenMP, which features a simple interface for the parallelization of programs, are now incorporating fine-grain constructs. However, since coarse-grain directives are still heavily used, the OpenMP runtime is forced to support both coarse- and fine-grain models of execution, potentially reducing the advantages obtained when executing an application in a fully fine-grain environment. To evaluate the type of applications that benefit from executing in a unified fine-grain program execution model, this article presents a multigrain parallel programming environment for the generation of fine-grain multithreaded applications from programs featuring OpenMP's API, allowing OpenMP programs to be run on top of a fine-grain event-driven program execution model. Experimental results with five scientific benchmarks show that fine-grain applications, generated by and run on our environment with two runtimes implementing a fine-grain event-driven program execution model, are competitive and can outperform their OpenMP counterparts, especially for data-intensive workloads with irregular and dynamic parallelism, reaching speedups as high as 2.6$ \times $ for Graph500 and 51$ \times $ for NAS Data Cube.", acknowledgement = ack-nhfb, articleno = "47", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Awan:2017:CCD, author = "Ammar Ahmad Awan and Khaled Hamidouche and Jahanzeb Maqbool Hashmi and Dhabaleswar K. Panda", title = "{S-Caffe}: Co-designing {MPI} Runtimes and {Caffe} for Scalable Deep Learning on Modern {GPU} Clusters", journal = j-SIGPLAN, volume = "52", number = "8", pages = "193--205", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018769", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Availability of large data sets like ImageNet and massively parallel computation support in modern HPC devices like NVIDIA GPUs have fueled a renewed interest in Deep Learning (DL) algorithms. This has triggered the development of DL frameworks like Caffe, Torch, TensorFlow, and CNTK. However, most DL frameworks have been limited to a single node. In order to scale out DL frameworks and bring HPC capabilities to the DL arena, we propose, S-Caffe; a scalable and distributed Caffe adaptation for modern multi-GPU clusters. With an in-depth analysis of new requirements brought forward by the DL frameworks and limitations of current communication runtimes, we present a co-design of the Caffe framework and the MVAPICH2-GDR MPI runtime. Using the co-design methodology, we modify Caffe's workflow to maximize the overlap of computation and communication with multi-stage data propagation and gradient aggregation schemes. We bring DL-Awareness to the MPI runtime by proposing a hierarchical reduction design that benefits from CUDA-Aware features and provides up to a massive 133x speedup over OpenMPI and 2.6x speedup over MVAPICH2 for 160 GPUs. S-Caffe successfully scales up to 160 K-80 GPUs for GoogLeNet (ImageNet) with a speedup of 2.5x over 32 GPUs. To the best of our knowledge, this is the first framework that scales up to 160 GPUs. Furthermore, even for single node training, S-Caffe shows an improvement of 14\% and 9\% over Nvidia's optimized Caffe for 8 and 16 GPUs, respectively. In addition, S-Caffe achieves up to 1395 samples per second for the AlexNet model, which is comparable to the performance of Microsoft CNTK.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Bae:2017:SEF, author = "Seung-Hee Bae and Daniel Halperin and Jevin D. West and Martin Rosvall and Bill Howe", title = "Scalable and Efficient Flow-Based Community Detection for Large-Scale Graph Analysis", journal = j-TKDD, volume = "11", number = "3", pages = "32:1--32:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/2992785", ISSN = "1556-4681 (print), 1556-472X (electronic)", ISSN-L = "1556-4681", bibdate = "Mon Jul 24 17:32:52 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tkdd.bib", abstract = "Community detection is an increasingly popular approach to uncover important structures in large networks. Flow-based community detection methods rely on communication patterns of the network rather than structural properties to determine communities. The Infomap algorithm in particular optimizes a novel objective function called the map equation and has been shown to outperform other approaches in third-party benchmarks. However, Infomap and its variants are inherently sequential, limiting their use for large-scale graphs. In this article, we propose a novel algorithm to optimize the map equation called RelaxMap. RelaxMap provides two important improvements over Infomap: parallelization, so that the map equation can be optimized over much larger graphs, and prioritization, so that the most important work occurs first, iterations take less time, and the algorithm converges faster. We implement these techniques using OpenMP on shared-memory multicore systems, and evaluate our approach on a variety of graphs from standard graph clustering benchmarks as well as real graph datasets. Our evaluation shows that both techniques are effective: RelaxMap achieves 70\% parallel efficiency on eight cores, and prioritization improves algorithm performance by an additional 20--50\% on average, depending on the graph properties. Additionally, RelaxMap converges in the similar number of iterations and provides solutions of equivalent quality as the serial Infomap implementation.", acknowledgement = ack-nhfb, articleno = "32", fjournal = "ACM Transactions on Knowledge Discovery from Data (TKDD)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054", } @Article{Barthels:2017:DJA, author = "Claude Barthels and Ingo M{\"u}ller and Timo Schneider and Gustavo Alonso and Torsten Hoefler", title = "Distributed join algorithms on thousands of cores", journal = j-PROC-VLDB-ENDOWMENT, volume = "10", number = "5", pages = "517--528", month = jan, year = "2017", CODEN = "????", ISSN = "2150-8097", bibdate = "Sat Feb 25 09:01:51 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", abstract = "Traditional database operators such as joins are relevant not only in the context of database engines but also as a building block in many computational and machine learning algorithms. With the advent of big data, there is an increasing demand for efficient join algorithms that can scale with the input data size and the available hardware resources. In this paper, we explore the implementation of distributed join algorithms in systems with several thousand cores connected by a low-latency network as used in high performance computing systems or data centers. We compare radix hash join to sort-merge join algorithms and discuss their implementation at this scale. In the paper, we explain how to use MPI to implement joins, show the impact and advantages of RDMA, discuss the importance of network scheduling, and study the relative performance of sorting vs. hashing. The experimental results show that the algorithms we present scale well with the number of cores, reaching a throughput of 48.7 billion input tuples per second on 4,096 cores.", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", journal-URL = "http://portal.acm.org/citation.cfm?id=J1174", } @Article{Bonelli:2017:MCA, author = "Francesco Bonelli and Michele Tuttafesta and Gianpiero Colonna and Luigi Cutrone and Giuseppe Pascazio", title = "An {MPI--CUDA} approach for hypersonic flows with detailed state-to-state air kinetics using a {GPU} cluster", journal = j-COMP-PHYS-COMM, volume = "219", number = "??", pages = "178--195", month = oct, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Jul 26 06:22:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517301613", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Bruel:2017:ACC, author = "Pedro Bruel and Marcos Amar{\'\i}s and Alfredo Goldman", title = "Autotuning {CUDA} compiler parameters for heterogeneous applications using the {OpenTuner} framework", journal = j-CCPE, volume = "29", number = "22", pages = "??--??", day = "25", month = nov, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3973", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Dec 30 09:11:59 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Carpen-Amarie:2017:EOC, author = "Alexandra Carpen-Amarie and Sascha Hunold and Jesper Larsson Tr{\"a}ff", title = "On expected and observed communication performance with {MPI} derived datatypes", journal = j-PARALLEL-COMPUTING, volume = "69", number = "??", pages = "98--117", month = nov, year = "2017", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Oct 24 15:15:02 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301217", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Celik:2017:BET, author = "Ahmet Celik and Sreepathi Pai and Sarfraz Khurshid and Milos Gligoric", title = "Bounded exhaustive test-input generation on {GPUs}", journal = j-PACMPL, volume = "1", number = "OOPSLA", pages = "94:1--94:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3133918", ISSN = "2475-1421", bibdate = "Wed Jan 10 09:45:26 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pacmpl.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, articleno = "94", fjournal = "Proceedings of the ACM on Programming Languages", journal-URL = "https://pacmpl.acm.org/", } @Article{Chabbi:2017:EAL, author = "Milind Chabbi and Abdelhalim Amer and Shasha Wen and Xu Liu", title = "An Efficient Abortable-locking Protocol for Multi-level {NUMA} Systems", journal = j-SIGPLAN, volume = "52", number = "8", pages = "61--74", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018768", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "The popularity of Non-Uniform Memory Access (NUMA) architectures has led to numerous locality-preserving hierarchical lock designs, such as HCLH, HMCS, and cohort locks. Locality-preserving locks trade fairness for higher throughput. Hence, some instances of acquisitions can incur long latencies, which may be intolerable for certain applications. Few locks admit a waiting thread to abandon its protocol on a timeout. State-of-the-art abortable locks are not fully locality aware, introduce high overheads, and unsuitable for frequent aborts. Enhancing locality-aware locks with lightweight timeout capability is critical for their adoption. In this paper, we design and evaluate the HMCS-T lock, a Hierarchical MCS (HMCS) lock variant that admits a timeout. HMCS-T maintains the locality benefits of HMCS while ensuring aborts to be lightweight. HMCS-T offers the progress guarantee missing in most abortable queuing locks. Our evaluations show that HMCS-T offers the timeout feature at a moderate overhead over its HMCS analog. HMCS-T, used in an MPI runtime lock, mitigated the poor scalability of an MPI+OpenMP BFS code and resulted in 4.3x superior scaling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Chen:2017:AAG, author = "Jian Chen and Russell M. Clapp", title = "{Astro}: Auto-Generation of Synthetic Traces Using Scaling Pattern Recognition for {MPI} Workloads", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "28", number = "8", pages = "2159--2171", month = aug, year = "2017", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2649518", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Jul 25 18:46:21 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2017/08/07809142-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Cornelis:2017:HAV, author = "Jan G. Cornelis and Jan Lemeire and Tim Bruylants and Peter Schelkens", title = "Heterogeneous acceleration of volumetric {JPEG 2000} using {OpenCL}", journal = j-IJHPCA, volume = "31", number = "3", pages = "229--245", year = "2017", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342016646438", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue Nov 6 06:13:05 MST 2018", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/full/10.1177/1094342016646438", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", xxmonth = may, } @Article{Dang:2017:ECB, author = "Hoang-Vu Dang and Marc Snir and William Gropp", title = "Eliminating contention bottlenecks in multithreaded {MPI}", journal = j-PARALLEL-COMPUTING, volume = "69", number = "??", pages = "1--23", month = nov, year = "2017", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Oct 24 15:15:02 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301187", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dashti:2017:AMM, author = "Mohammad Dashti and Alexandra Fedorova", title = "Analyzing memory management methods on integrated {CPU--GPU} systems", journal = j-SIGPLAN, volume = "52", number = "9", pages = "59--69", month = sep, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3156685.3092256", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:13 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous systems that integrate a multicore CPU and a GPU on the same die are ubiquitous. On these systems, both the CPU and GPU share the same physical memory as opposed to using separate memory dies. Although integration eliminates the need to copy data between the CPU and the GPU, arranging transparent memory sharing between the two devices can carry large overheads. Memory on CPU/GPU systems is typically managed by a software framework such as OpenCL or CUDA, which includes a runtime library, and communicates with a GPU driver. These frameworks offer a range of memory management methods that vary in ease of use, consistency guarantees and performance. In this study, we analyze some of the common memory management methods of the most widely used software frameworks for heterogeneous systems: CUDA, OpenCL 1.2, OpenCL 2.0, and HSA, on NVIDIA and AMD hardware. We focus on performance/functionality trade-offs, with the goal of exposing their performance impact and simplifying the choice of memory management methods for programmers.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ISMM '17 conference proceedings.", } @Article{deAndrade:2017:OFH, author = "Douglas Coimbra de Andrade and Lu{\'\i}s Gonzaga Trabasso", title = "An {OpenCL} framework for high performance extraction of image features", journal = j-J-PAR-DIST-COMP, volume = "109", number = "??", pages = "75--88", month = nov, year = "2017", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Aug 19 13:10:32 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731517301624", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Degomme:2017:SMA, author = "Augustin Degomme and Arnaud Legrand and George S. Markomanolis and Martin Quinson and Mark Stillwell and Frederic Suter", title = "Simulating {MPI} Applications: The {SMPI} Approach", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "28", number = "8", pages = "2387--2400", month = aug, year = "2017", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2669305", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Jul 25 18:46:21 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2017/08/07855780-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Diavastos:2017:SLR, author = "Andreas Diavastos and Pedro Trancoso", title = "{SWITCHES}: a Lightweight Runtime for Dataflow Execution of Tasks on Many-Cores", journal = j-TACO, volume = "14", number = "3", pages = "31:1--31:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3127068", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Wed Sep 6 17:12:05 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "SWITCHES is a task-based dataflow runtime that implements a lightweight distributed triggering system for runtime dependence resolution and uses static scheduling and compile-time assignment policies to reduce runtime overheads. Unlike other systems, the granularity of loop-tasks can be increased to favor data-locality, even when having dependences across different loops. SWITCHES introduces explicit task resource allocation mechanisms for efficient allocation of resources and adopts the latest OpenMP Application Programming Interface (API), as to maintain high levels of programming productivity. It provides a source-to-source tool that automatically produces thread-based code. Performance on an Intel Xeon-Phi shows good scalability and surpasses OpenMP by an average of 32\%.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Dietrich:2017:CBA, author = "Robert Dietrich and Felix Schmitt and Alexander Grund and Jonas Stolle", title = "Critical-blame analysis for {OpenMP 4.0} offloading on {Intel Xeon Phi}", journal = j-J-SYST-SOFTW, volume = "125", number = "??", pages = "381--388", month = mar, year = "2017", CODEN = "JSSODM", ISSN = "0164-1212 (print), 1873-1228 (electronic)", ISSN-L = "0164-1212", bibdate = "Sat Feb 4 12:20:39 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "//www.sciencedirect.com/science/article/pii/S0164121215002940", acknowledgement = ack-nhfb, fjournal = "Journal of Systems and Software", journal-URL = "http://www.sciencedirect.com/science/journal/01641212/", } @Article{Eizenberg:2017:BBL, author = "Ariel Eizenberg and Yuanfeng Peng and Toma Pigli and William Mansky and Joseph Devietti", title = "{BARRACUDA}: binary-level analysis of runtime {RAces} in {CUDA} programs", journal = j-SIGPLAN, volume = "52", number = "6", pages = "126--140", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062342", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "GPU programming models enable and encourage massively parallel programming with over a million threads, requiring extreme parallelism to achieve good performance. Massive parallelism brings significant correctness challenges by increasing the possibility for bugs as the number of thread interleavings balloons. Conventional dynamic safety analyses struggle to run at this scale. We present BARRACUDA, a concurrency bug detector for GPU programs written in Nvidia's CUDA language. BARRACUDA handles a wider range of parallelism constructs than previous work, including branch operations, low-level atomics and memory fences, which allows BARRACUDA to detect new classes of concurrency bugs. BARRACUDA operates at the binary level for increased compatibility with existing code, leveraging a new binary instrumentation framework that is extensible to other dynamic analyses. BARRACUDA incorporates a number of novel optimizations that are crucial for scaling concurrency bug detection to over a million threads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '17 conference proceedings.", } @Article{Fachada:2017:CCF, author = "Nuno Fachada and Vitor V. Lopes and Rui C. Martins and Agostinho C. Rosa", title = "{\tt cf4ocl}: a {C} framework for {OpenCL}", journal = j-SCI-COMPUT-PROGRAM, volume = "143", number = "??", pages = "9--19", day = "1", month = sep, year = "2017", CODEN = "SCPGD4", ISSN = "0167-6423 (print), 1872-7964 (electronic)", ISSN-L = "0167-6423", bibdate = "Wed Jul 26 05:56:44 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167642317300540", acknowledgement = ack-nhfb, fjournal = "Science of Computer Programming", journal-URL = "http://www.sciencedirect.com/science/journal/01676423", } @Article{Falch:2017:MLB, author = "Thomas L. Falch and Anne C. Elster", title = "Machine learning-based auto-tuning for enhanced performance portability of {OpenCL} applications", journal = j-CCPE, volume = "29", number = "8", pages = "??--??", day = "25", month = apr, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4029", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Fri Mar 31 19:12:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Falch:2017:RAM, author = "Thomas L. Falch and Anne C. Elster", title = "Machine learning-based auto-tuning for enhanced performance portability of {OpenCL} applications", journal = j-CCPE, volume = "29", number = "8", pages = "??--??", day = "25", month = apr, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4029", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Fri Mar 31 19:12:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Fan:2017:SEE, author = "Xing Fan and Mostafa Mehrabi and Oliver Sinnen and Nasser Giacaman", title = "Supporting Enhanced Exception Handling with {OpenMP} in Object--Oriented Languages", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "6", pages = "1366--1389", month = dec, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0474-x", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Nov 18 09:27:28 MST 2017", bibsource = "http://link.springer.com/journal/10766/45/6; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Forejt:2017:PPA, author = "Vojt{\u{a}}ch Forejt and Saurabh Joshi and Daniel Kroening and Ganesh Narayanaswamy and Subodh Sharma", title = "Precise Predictive Analysis for Discovering Communication Deadlocks in {MPI} Programs", journal = j-TOPLAS, volume = "39", number = "4", pages = "15:1--15:??", month = sep, year = "2017", CODEN = "ATPSDT", DOI = "https://doi.org/10.1145/3095075", ISSN = "0164-0925 (print), 1558-4593 (electronic)", ISSN-L = "0164-0925", bibdate = "Tue Sep 19 06:38:32 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/toplas/; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toplas.bib", abstract = "The Message Passing Interface (MPI) is the standard API for parallelization in high-performance and scientific computing. Communication deadlocks are a frequent problem in MPI programs, and this article addresses the problem of discovering such deadlocks. We begin by showing that if an MPI program is single path, the problem of discovering communication deadlocks is NP-complete. We then present a novel propositional encoding scheme that captures the existence of communication deadlocks. The encoding is based on modeling executions with partial orders and implemented in a tool called MOPPER. The tool executes an MPI program, collects the trace, builds a formula from the trace using the propositional encoding scheme, and checks its satisfiability. Finally, we present experimental results that quantify the benefit of the approach in comparison to other analyzers and demonstrate that it offers a scalable solution for single-path programs.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Programming Languages and Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783", } @Article{Frust:2017:RDP, author = "Tobias Frust and Michael Wagner and Jan Stephan and Guido Juckeland and Andr{\'e} Bieberle", title = "Rapid data processing for ultrafast {X}-ray computed tomography using scalable and modular {CUDA} based pipelines", journal = j-COMP-PHYS-COMM, volume = "219", number = "??", pages = "353--360", month = oct, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Jul 26 06:22:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517301674", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Fumero:2017:JTG, author = "Juan Fumero and Michel Steuwer and Lukas Stadler and Christophe Dubach", title = "Just-In-Time {GPU} Compilation for Interpreted Languages with Partial Evaluation", journal = j-SIGPLAN, volume = "52", number = "7", pages = "60--73", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050761", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Computer systems are increasingly featuring powerful parallel devices with the advent of many-core CPUs and GPUs. This offers the opportunity to solve computationally-intensive problems at a fraction of the time traditional CPUs need. However, exploiting heterogeneous hardware requires the use of low-level programming language approaches such as OpenCL, which is incredibly challenging, even for advanced programmers. On the application side, interpreted dynamic languages are increasingly becoming popular in many domains due to their simplicity, expressiveness and flexibility. However, this creates a wide gap between the high-level abstractions offered to programmers and the low-level hardware-specific interface. Currently, programmers must rely on high performance libraries or they are forced to write parts of their application in a low-level language like OpenCL. Ideally, nonexpert programmers should be able to exploit heterogeneous hardware directly from their interpreted dynamic languages. In this paper, we present a technique to transparently and automatically offload computations from interpreted dynamic languages to heterogeneous devices. Using just-in-time compilation, we automatically generate OpenCL code at runtime which is specialized to the actual observed data types using profiling information. We demonstrate our technique using R, which is a popular interpreted dynamic language predominately used in big data analytic. Our experimental results show the execution on a GPU yields speedups of over 150x compared to the sequential FastR implementation and the obtained performance is competitive with manually written GPU code. We also show that when taking into account start-up time, large speedups are achievable, even when the applications run for as little as a few seconds.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "VEE '17 conference proceedings.", } @Article{Germanas:2017:HUP, author = "D. Germanas and A. Stepsys and S. Mickevicius and R. K. Kalinauskas", title = "{HOTB} update: Parallel code for calculation of three- and four-particle harmonic oscillator transformation brackets and their matrices using {OpenMP}", journal = j-COMP-PHYS-COMM, volume = "215", number = "??", pages = "259--264", month = jun, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Mar 31 15:52:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300401", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Ghose:2017:FOT, author = "Anirban Ghose and Lokesh Dokara and Soumyajit Dey and Pabitra Mitra", title = "A Framework for {OpenCL} Task Scheduling on Heterogeneous Multicores", journal = j-PARALLEL-PROCESS-LETT, volume = "27", number = "3--4", pages = "1750008", year = "2017", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626417500086", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Tue May 29 09:05:31 MDT 2018", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Gonzalez-Alvarez:2017:HMO, author = "David L. Gonz{\'a}lez-{\'A}lvarez and Miguel A. Vega-Rodr{\'\i}guez and {\'A}lvaro Rubio-Largo", title = "A hybrid {MPI\slash OpenMP} parallel implementation of {NSGA--II} for finding patterns in protein sequences", journal = j-J-SUPERCOMPUTING, volume = "73", number = "6", pages = "2285--2312", month = jun, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1916-3", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:33 MDT 2017", bibsource = "http://link.springer.com/journal/11227/73/6; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Grosset:2017:TTT, author = "A. V. Pascal Grosset and Manasa Prasad and Cameron Christensen and Aaron Knoll and Charles Hansen", title = "{TOD}-Tree: Task-Overlapped Direct Send Tree Image Compositing for Hybrid {MPI} Parallelism and {GPUs}", journal = j-IEEE-TRANS-VIS-COMPUT-GRAPH, volume = "23", number = "6", pages = "1677--1690", month = jun, year = "2017", CODEN = "ITVGEA", DOI = "https://doi.org/10.1109/TVCG.2016.2542069", ISSN = "1077-2626 (print), 1941-0506 (electronic), 2160-9306", ISSN-L = "1077-2626", bibdate = "Thu Jun 29 18:38:25 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetransviscomputgraph.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/tg/2017/06/07433468-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2945", } @Article{Han:2017:SLS, author = "Yiming Han and Anthony T. Chronopoulos", title = "Scalable Loop Self-scheduling Schemes for Large-Scale Clusters and Cloud Systems", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "3", pages = "595--611", month = jun, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0434-5", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jun 24 11:37:59 MDT 2017", bibsource = "http://link.springer.com/journal/10766/45/3; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Haque:2017:CCL, author = "S. Anisul Haque and X. Li and F. Mansouri and M. Moreno Maza and D. Mohajerani and W. Pan", title = "{CUMODP}: a {CUDA} library for modular polynomial computation", journal = j-ACM-COMM-COMP-ALGEBRA, volume = "51", number = "3", pages = "89--91", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3177795.3177799", ISSN = "1932-2232 (print), 1932-2240 (electronic)", ISSN-L = "1932-2232", bibdate = "Fri Jan 5 06:22:51 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigsam.bib", abstract = "The CUDA Modular Polynomial (CUMODP) Library implements arithmetic operations for dense matrices and dense polynomials, primarily with modular integer coefficients. Some operations are available for integer or floating point coefficients. Similar to other software libraries, like CuBLAS$^1$ targeting Graphics Processing Units (GPUs), CUMODP focuses on efficiency-critical routines and provides them in the form of device functions and CUDA kernels. Hence, these routines are primarily designed to offer GPU support to polynomial system solvers. A bivariate system solver is part of the library, as a proof-of-concept. Its implementation is presented in [10] and it is integrated in Maple's Triangularize command$^2$, since the release 18 of Maple.", acknowledgement = ack-nhfb, fjournal = "ACM Communications in Computer Algebra", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000", } @Article{Hasanov:2017:HRC, author = "Khalid Hasanov and Alexey Lastovetsky", title = "Hierarchical redesign of classic {MPI} reduction algorithms", journal = j-J-SUPERCOMPUTING, volume = "73", number = "2", pages = "713--725", month = feb, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1779-7", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:32 MDT 2017", bibsource = "http://link.springer.com/journal/11227/73/2; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Henriksen:2017:FPF, author = "Troels Henriksen and Niels G. W. Serup and Martin Elsman and Fritz Henglein and Cosmin E. Oancea", title = "{Futhark}: purely functional {GPU-programming} with nested parallelism and in-place array updates", journal = j-SIGPLAN, volume = "52", number = "6", pages = "556--571", month = jun, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140587.3062354", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Futhark is a purely functional data-parallel array language that offers a machine-neutral programming model and an optimising compiler that generates OpenCL code for GPUs. This paper presents the design and implementation of three key features of Futhark that seek a suitable middle ground with imperative approaches. First, in order to express efficient code inside the parallel constructs, we introduce a simple type system for in-place updates that ensures referential transparency and supports equational reasoning. Second, we furnish Futhark with parallel operators capable of expressing efficient strength-reduced code, along with their fusion rules. Third, we present a flattening transformation aimed at enhancing the degree of parallelism that (i) builds on loop interchange and distribution but uses higher-order reasoning rather than array-dependence analysis, and (ii) still allows further locality-of-reference optimisations. Finally, an evaluation on 16 benchmarks demonstrates the impact of the language and compiler features and shows application-level performance competitive with hand-written GPU code.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '17 conference proceedings.", } @Article{Jan:2017:ITF, author = "Bilal Jan and Fiaz Gul Khan and Bartolomeo Montrucchio and Anthony Theodore Chronopoulos and Shahaboddin Shamshirband and Abdul Nasir Khan", title = "Introducing {ToPe--FFT}: An {OpenCL}-based {FFT} library targeting {GPUs}", journal = j-CCPE, volume = "29", number = "21", pages = "??--??", day = "10", month = nov, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4256", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Dec 30 09:11:58 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Jarzabek:2017:PEU, author = "Lukasz Jarzabek and Pawel Czarnul", title = "Performance evaluation of unified memory and dynamic parallelism for selected parallel {CUDA} applications", journal = j-J-SUPERCOMPUTING, volume = "73", number = "12", pages = "5378--5401", month = dec, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-017-2091-x", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jan 6 08:59:18 MST 2018", bibsource = "http://link.springer.com/journal/11227/73/12; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/s11227-017-2091-x.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Jatala:2017:SSG, author = "Vishwesh Jatala and Jayvant Anantpur and Amey Karkare", title = "Scratchpad Sharing in {GPUs}", journal = j-TACO, volume = "14", number = "2", pages = "15:1--15:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3075619", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Mon Jul 24 18:00:59 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "General-Purpose Graphics Processing Unit (GPGPU) applications exploit on-chip scratchpad memory available in the Graphics Processing Units (GPUs) to improve performance. The amount of thread level parallelism (TLP) present in the GPU is limited by the number of resident threads, which in turn depends on the availability of scratchpad memory in its streaming multiprocessor (SM). Since the scratchpad memory is allocated at thread block granularity, part of the memory may remain unutilized. In this article, we propose architectural and compiler optimizations to improve the scratchpad memory utilization. Our approach, called Scratchpad Sharing, addresses scratchpad under-utilization by launching additional thread blocks in each SM. These thread blocks use unutilized scratchpad memory and also share scratchpad memory with other resident blocks. To improve the performance of scratchpad sharing, we propose Owner Warp First (OWF) scheduling that schedules warps from the additional thread blocks effectively. The performance of this approach, however, is limited by the availability of the part of scratchpad memory that is shared among thread blocks. We propose compiler optimizations to improve the availability of shared scratchpad memory. We describe an allocation scheme that helps in allocating scratchpad variables such that shared scratchpad is accessed for short duration. We introduce a new hardware instruction, relssp, that when executed releases the shared scratchpad memory. Finally, we describe an analysis for optimal placement of relssp instructions, such that shared scratchpad memory is released as early as possible, but only after its last use, along every execution path. We implemented the hardware changes required for scratchpad sharing and the relssp instruction using the GPGPU-Sim simulator and implemented the compiler optimizations in Ocelot framework. We evaluated the effectiveness of our approach on 19 kernels from 3 benchmarks suites: CUDA-SDK, GPGPU-Sim, and Rodinia. The kernels that under-utilize scratchpad memory show an average improvement of 19\% and maximum improvement of 92.17\% in terms of the number of instruction executed per cycle when compared to the baseline approach, without affecting the performance of the kernels that are not limited by scratchpad memory.", acknowledgement = ack-nhfb, articleno = "15", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Jo:2017:PMA, author = "Gangwon Jo and Jaehoon Jung and Jiyoung Park and Jaejin Lee", title = "{Poster}: {MAPA}: an Automatic Memory Access Pattern Analyzer for {GPU} Applications", journal = j-SIGPLAN, volume = "52", number = "8", pages = "443--444", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019034", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Various existing optimization and memory consistency management techniques for GPU applications rely on memory access patterns of kernels. However, they suffer from poor practicality because they require explicit user interventions to extract kernel memory access patterns. This paper proposes an automatic memory-access-pattern analysis framework called MAPA. MAPA is based on a source-level analysis technique derived from traditional symbolic analyses and a run-time pattern selection technique. The experimental results show that MAPA properly analyzes 116 real-world OpenCL kernels from Rodinia and Parboil.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Julian-Moreno:2017:FPA, author = "Guillermo Juli{\'a}n-Moreno and Jorge E. L{\'o}pez de Vergara and Iv{\'a}n Gonz{\'a}lez and Luis de Pedro and Javier Royuela-del-Val and Federico Simmross-Wattenberg", title = "Fast parallel $ \alpha $-stable distribution function evaluation and parameter estimation using {OpenCL} in {GPGPUs}", journal = j-STAT-COMPUT, volume = "27", number = "5", pages = "1365--1382", month = sep, year = "2017", CODEN = "STACE3", ISSN = "0960-3174 (print), 1573-1375 (electronic)", ISSN-L = "0960-3174", bibdate = "Thu Jun 8 18:03:56 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/statcomput.bib", acknowledgement = ack-nhfb, fjournal = "Statistics and Computing", journal-URL = "http://link.springer.com/journal/11222", } @Article{Katouda:2017:MOH, author = "Michio Katouda and Takahito Nakajima", title = "{MPI\slash OpenMP} hybrid parallel algorithm for resolution of identity second-order {M{\o}ller--Plesset} perturbation calculation of analytical energy gradient for massively parallel multicore supercomputers", journal = j-J-COMPUT-CHEM, volume = "38", number = "8", pages = "489--507", day = "30", month = mar, year = "2017", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.24701", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Mon Feb 20 11:51:05 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651", } @Article{Khan:2017:RCS, author = "Ayaz H. Khan and Mayez Al-Mouhamed and Muhammed Al-Mulhem and Adel F. Ahmed", title = "{RT-CUDA}: A Software Tool for {CUDA} Code Restructuring", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "3", pages = "551--594", month = jun, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0433-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jun 24 11:37:59 MDT 2017", bibsource = "http://link.springer.com/journal/10766/45/3; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Kojima:2017:HLG, author = "Kensuke Kojima and Atsushi Igarashi", title = "A {Hoare} Logic for {GPU} Kernels", journal = j-TOCL, volume = "18", number = "1", pages = "3:1--3:??", month = apr, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3001834", ISSN = "1529-3785 (print), 1557-945X (electronic)", ISSN-L = "1529-3785", bibdate = "Thu Apr 13 17:53:54 MDT 2017", bibsource = "http://www.acm.org/pubs/contents/journals/tocl/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tocl.bib", abstract = "We study a Hoare Logic to reason about parallel programs executed on graphics processing units (GPUs), called GPU kernels. During the execution of GPU kernels, multiple threads execute in lockstep, that is, execute the same instruction simultaneously. When the control branches, the two branches are executed sequentially, but during the execution of each branch only those threads that take it are enabled; after the control converges, all the threads are enabled and again execute in lockstep. In this article, we first consider a semantics in which all threads execute in lockstep (this semantics simplifies the actual execution model of GPUs) and adapt Hoare Logic to this setting by augmenting the usual Hoare triples with an additional component representing the set of enabled threads. It is determined that the soundness and relative completeness of the logic do not hold for all programs; a difficulty arises from the fact that one thread can invalidate the loop termination condition of another thread through shared memory. We overcome this difficulty by identifying an appropriate class of programs for which the soundness and relative completeness hold. Additionally, we discuss thread interleaving, which is present in the actual execution of GPUs but not in the lockstep semantics mentioned above. We show that if a program is race free, then the lockstep and interleaving semantics produce the same result. This implies that our logic is sound and relatively complete for race-free programs, even if the thread interleaving is taken into account.", acknowledgement = ack-nhfb, articleno = "3", fjournal = "ACM Transactions on Computational Logic", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J773", } @Article{Kotselidis:2017:HMR, author = "Christos Kotselidis and James Clarkson and Andrey Rodchenko and Andy Nisbet and John Mawer and Mikel Luj{\'a}n", title = "Heterogeneous Managed Runtime Systems: a Computer Vision Case Study", journal = j-SIGPLAN, volume = "52", number = "7", pages = "74--82", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Real-time 3D space understanding is becoming prevalent across a wide range of applications and hardware platforms. To meet the desired Quality of Service (QoS), computer vision applications tend to be heavily parallelized and exploit any available hardware accelerators. Current approaches to achieving real-time computer vision, evolve around programming languages typically associated with High Performance Computing along with binding extensions for OpenCL or CUDA execution. Such implementations, although high performing, lack portability across the wide range of diverse hardware resources and accelerators. In this paper, we showcase how a complex computer vision application can be implemented within a managed runtime system. We discuss the complexities of achieving high-performing and portable execution across embedded and desktop configurations. Furthermore, we demonstrate that it is possible to achieve the QoS target of over 30 frames per second (FPS) by exploiting FPGA and GPGPU acceleration transparently through the managed runtime system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "VEE '17 conference proceedings.", } @Article{Kouetcha:2017:USP, author = "Daniella Nguemalieu Kouetcha and Hamidr{\'e}za Ram{\'e}zani and Nathalie Cohaut", title = "Ultrafast scalable parallel algorithm for the radial distribution function histogramming using {MPI} maps", journal = j-J-SUPERCOMPUTING, volume = "73", number = "4", pages = "1629--1653", month = apr, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1854-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:33 MDT 2017", bibsource = "http://link.springer.com/journal/11227/73/4; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Li:2017:PCO, author = "Shigang Li and Yunquan Zhang and Torsten Hoefler", title = "{Poster}: Cache-Oblivious {MPI} All-to-All Communications on Many-Core Architectures", journal = j-SIGPLAN, volume = "52", number = "8", pages = "445--446", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3019025", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "In the many-core era, the performance of MPI collectives is more dependent on the intra-node communication component. However, the communication algorithms generally inherit from the inter-node version and ignore the cache complexity. We propose cache-oblivious algorithms for MPI all-to-all operations, in which data blocks are copied into the receive buffers in Morton order to exploit data locality. Experimental results on different many-core architectures show that our cache-oblivious implementations significantly outperform the naive implementations based on shared heap and the highly optimized MPI libraries.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Losada:2017:ARV, author = "Nuria Losada and Mar{\'\i}a J. Mart{\'\i}n and Patricia Gonz{\'a}lez", title = "Assessing resilient versus stop-and-restart fault-tolerant solutions in {MPI} applications", journal = j-J-SUPERCOMPUTING, volume = "73", number = "1", pages = "316--329", month = jan, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1863-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:31 MDT 2017", bibsource = "http://link.springer.com/journal/11227/73/1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Losada:2017:RMA, author = "Nuria Losada and Iv{\'a}n Cores and Mar{\'\i}a J. Mart{\'\i}n and Patricia Gonz{\'a}lez", title = "Resilient {MPI} applications using an application-level checkpointing framework and {ULFM}", journal = j-J-SUPERCOMPUTING, volume = "73", number = "1", pages = "100--113", month = jan, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1629-7", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jun 24 10:31:31 MDT 2017", bibsource = "http://link.springer.com/journal/11227/73/1; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Maier:2017:OLD, author = "Andrew J. Maier and Bruce F. Cockburn", title = "Optimization of Low-Density Parity Check decoder performance for {OpenCL} designs synthesized to {FPGAs}", journal = j-J-PAR-DIST-COMP, volume = "107", number = "??", pages = "134--145", month = sep, year = "2017", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Aug 19 13:10:31 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731517301004", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Malakar:2017:DMO, author = "Preeti Malakar and Venkatram Vishwanath", title = "Data movement optimizations for independent {MPI} {I/O} on the {Blue Gene/Q}", journal = j-PARALLEL-COMPUTING, volume = "61", number = "??", pages = "35--51", month = jan, year = "2017", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Sat Feb 4 08:48:35 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S016781911630062X", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191/", } @Article{Manwade:2017:DFA, author = "Karveer B. Manwade and Dinesh B. Kulkarni", title = "Data Flow Analysis of {MPI} Program Using Dynamic Analysis Technique with Partial Execution", journal = j-SCPE, volume = "18", number = "4", pages = "375--385", month = "????", year = "2017", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:49 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1335", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Marin:2017:ERF, author = "Manuel Marin and David Defour and Federico Milano", title = "An Efficient Representation Format for Fuzzy Intervals Based on Symmetric Membership Functions", journal = j-TOMS, volume = "43", number = "3", pages = "23:1--23:??", month = jan, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2939364", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Oct 4 10:55:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=2939364", abstract = "This article addresses the execution cost of arithmetic operations with a focus on fuzzy arithmetic. Thanks to an appropriate representation format for fuzzy intervals, we show that it is possible to halve the number of operations and divide by 2 to 8 the memory requirements compared to conventional solutions. In addition, we demonstrate the benefit of some hardware features encountered in today's accelerators (GPU) such as static rounding, memory usage, instruction-level parallelism (ILP), and thread-level parallelism (TLP). We then describe a library of fuzzy arithmetic operations written in CUDA and C++. The library is evaluated against traditional approaches using compute-bound and memory-bound benchmarks on Nvidia GPUs, with an observed performance gain of 2 to 20.", acknowledgement = ack-nhfb, articleno = "23", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Matheou:2017:DDC, author = "George Matheou and Paraskevas Evripidou", title = "Data-Driven Concurrency for High Performance Computing", journal = j-TACO, volume = "14", number = "4", pages = "53:1--53:??", month = dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3162014", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Dec 22 18:25:55 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "In this work, we utilize dynamic dataflow/data-driven techniques to improve the performance of high performance computing (HPC) systems. The proposed techniques are implemented and evaluated through an efficient, portable, and robust programming framework that enables data-driven concurrency on HPC systems. The proposed framework is based on data-driven multithreading (DDM), a hybrid control-flow/dataflow model that schedules threads based on data availability on sequential processors. The proposed framework was evaluated using several benchmarks, with different characteristics, on two different systems: a 4-node AMD system with a total of 128 cores and a 64-node Intel HPC system with a total of 768 cores. The performance evaluation shows that the proposed framework scales well and tolerates scheduling overheads and memory latencies effectively. We also compare our framework to MPI, DDM-VM, and OmpSs@Cluster. The comparison results show that the proposed framework obtains comparable or better performance.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Meister:2017:PME, author = "Oliver Meister and Kaveh Rahnema and Michael Bader", title = "Parallel Memory-Efficient Adaptive Mesh Refinement on Structured Triangular Meshes with Billions of Grid Cells", journal = j-TOMS, volume = "43", number = "3", pages = "19:1--19:27", month = jan, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2947668", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Oct 4 10:55:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=2947668", abstract = "We present sam(oa) 2, a software package for a dynamically adaptive, parallel solution of 2D partial differential equations on triangular grids created via newest vertex bisection. An element order imposed by the Sierpinski space-filling curve provides an algorithm for grid generation, refinement, and traversal that is inherently memory efficient. Based purely on stack and stream data structures, it completely avoids random memory access. Using an element-oriented data view suitable for local operators, concrete simulation scenarios are implemented based on control loops and event hooks, which hide the complexity of the underlying traversal scheme. Two case studies are presented: two-phase flow in heterogeneous porous media and tsunami wave propagation, demonstrated on the Tohoku tsunami 2011 in Japan. sam(oa) 2 features hybrid MPI+OpenMP parallelization based on the Sierpinski order induced on the elements. Sections defined by contiguous grid cells define atomic tasks for OpenMP work sharing and stealing, as well as for migration of grid cells between MPI processes. Using optimized communication and load balancing algorithms, sam(oa) 2 achieves 88\% strong scaling efficiency from 16 to 512 cores and 92\% efficiency in a weak scaling test on 8,192 cores with 10 billion elements-all tests including adaptive mesh refinement and load balancing in each time step.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Mendonca:2017:DAA, author = "Gleison Mendon{\c{c}}a and Breno Guimar{\~a}es and P{\'e}ricles Alves and M{\'a}rcio Pereira and Guido Ara{\'u}jo and Fernando Magno Quint{\~a}o Pereira", title = "{DawnCC}: Automatic Annotation for Data Parallelism and Offloading", journal = j-TACO, volume = "14", number = "2", pages = "13:1--13:??", month = jul, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084540", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Mon Jul 24 18:00:59 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Directive-based programming models, such as OpenACC and OpenMP, allow developers to convert a sequential program into a parallel one with minimum human intervention. However, inserting pragmas into production code is a difficult and error-prone task, often requiring familiarity with the target program. This difficulty restricts the ability of developers to annotate code that they have not written themselves. This article provides a suite of compiler-related methods to mitigate this problem. Such techniques rely on symbolic range analysis, a well-known static technique, to achieve two purposes: populate source code with data transfer primitives and to disambiguate pointers that could hinder automatic parallelization due to aliasing. We have materialized our ideas into a tool, DawnCC, which can be used stand-alone or through an online interface. To demonstrate its effectiveness, we show how DawnCC can annotate the programs available in PolyBench without any intervention from users. Such annotations lead to speedups of over $ 100 \times $ in an Nvidia architecture and over $ 50 \times $ in an ARM architecture.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Montella:2017:VCB, author = "Raffaele Montella and Giulio Giunta and Giuliano Laccetti and Marco Lapegna and Carlo Palmieri and Carmine Ferraro and Valentina Pelliccia and Cheol-Ho Hong and Ivor Spence and Dimitrios S. Nikolopoulos", title = "On the Virtualization of {CUDA} Based {GPU} Remoting on {ARM} and x86 Machines in the {GVirtuS} Framework", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "5", pages = "1142--1163", month = oct, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0462-1", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Nov 18 09:27:28 MST 2017", bibsource = "http://link.springer.com/journal/10766/45/5; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Moreira:2017:FCR, author = "Rubens E. A. Moreira and Sylvain Collange and Fernando Magno Quint{\~a}o Pereira", title = "Function Call Re-Vectorization", journal = j-SIGPLAN, volume = "52", number = "8", pages = "313--326", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018751", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Programming languages such as C for CUDA, OpenCL or ISPC have contributed to increase the programmability of SIMD accelerators and graphics processing units. However, these languages still lack the flexibility offered by low-level SIMD programming on explicit vectors. To close this expressiveness gap while preserving performance, this paper introduces the notion of \ourinvention{} (CREV). CREV allows changing the dimension of vectorization during the execution of a kernel, exposing it as a nested parallel kernel call. CREV affords programmability close to dynamic parallelism, a feature that allows the invocation of kernels from inside kernels, but at much lower cost. In this paper, we present a formal semantics of CREV, and an implementation of it on the ISPC compiler. We have used CREV to implement some classic algorithms, including string matching, depth first search and Bellman-Ford, with minimum effort. These algorithms, once compiled by ISPC to Intel-based vector instructions, are as fast as state-of-the-art implementations, yet much simpler. Thus, CREV gives developers the elegance of dynamic programming, and the performance of explicit SIMD programming.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Mossaiby:2017:OIH, author = "F. Mossaiby and A. Shojaei and M. Zaccariotto and U. Galvanetto", title = "{OpenCL} implementation of a high performance {$3$D} Peridynamic model on graphics accelerators", journal = j-COMPUT-MATH-APPL, volume = "74", number = "8", pages = "1856--1870", day = "15", month = oct, year = "2017", CODEN = "CMAPDK", ISSN = "0898-1221 (print), 1873-7668 (electronic)", ISSN-L = "0898-1221", bibdate = "Sat Jan 13 11:04:24 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122117304030", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Neugebauer:2017:PAR, author = "Olaf Neugebauer and Michael Engel and Peter Marwedel", title = "A parallelization approach for resource-restricted embedded heterogeneous {MPSoCs} inspired by {OpenMP}", journal = j-J-SYST-SOFTW, volume = "125", number = "??", pages = "439--448", month = mar, year = "2017", CODEN = "JSSODM", ISSN = "0164-1212 (print), 1873-1228 (electronic)", ISSN-L = "0164-1212", bibdate = "Sat Feb 4 12:20:39 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "//www.sciencedirect.com/science/article/pii/S0164121216301534", acknowledgement = ack-nhfb, fjournal = "Journal of Systems and Software", journal-URL = "http://www.sciencedirect.com/science/journal/01641212/", } @Article{Nguyen:2017:ATM, author = "Tan Nguyen and Pietro Cicotti and Eric Bylaska and Dan Quinlan and Scott Baden", title = "Automatic translation of {MPI} source into a latency-tolerant, data-driven form", journal = j-J-PAR-DIST-COMP, volume = "106", number = "??", pages = "1--13", month = aug, year = "2017", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Aug 19 13:10:31 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731517300771", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Omar:2017:PSF, author = "Cyrus Omar and Jonathan Aldrich", title = "Programmable semantic fragments: the design and implementation of {\tt typy}", journal = j-SIGPLAN, volume = "52", number = "3", pages = "81--92", month = mar, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093335.2993245", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper introduces typy, a statically typed programming language embedded by reflection into Python. typy features a fragmentary semantics, i.e. it delegates semantic control over each term, drawn from Python's fixed concrete and abstract syntax, to some contextually relevant user-defined semantic fragment. The delegated fragment programmatically (1) typechecks the term (following a bidirectional protocol); and (2) assigns dynamic meaning to the term by computing a translation to Python. We argue that this design is expressive with examples of fragments that express the static and dynamic semantics of (1) functional records; (2) labeled sums (with nested pattern matching a la ML); (3) a variation on JavaScript's prototypal object system; and (4) typed foreign interfaces to Python and OpenCL. These semantic structures are, or would need to be, defined primitively in conventionally structured languages. We further argue that this design is compositionally well-behaved. It avoids the expression problem and the problems of grammar composition because the syntax is fixed. Moreover, programs are semantically stable under fragment composition (i.e. defining a new fragment will not change the meaning of existing program components.)", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "GPCE '16 conference proceedings.", } @Article{Pereira:2017:SBC, author = "Phillipe Pereira and Higo Albuquerque and Isabela da Silva and Hendrio Marques and Felipe Monteiro and Ricardo Ferreira and Lucas Cordeiro", title = "{SMT}-based context-bounded model checking for {CUDA} programs", journal = j-CCPE, volume = "29", number = "22", pages = "??--??", day = "25", month = nov, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3934", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Dec 30 09:11:59 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Qawasmeh:2017:PPR, author = "Ahmad Qawasmeh and Maxime R. Hugues and Henri Calandra and Barbara M. Chapman", title = "Performance portability in reverse time migration and seismic modelling via {OpenACC}", journal = j-IJHPCA, volume = "31", number = "5", pages = "422--440", month = sep, year = "2017", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jan 6 10:31:59 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Rathgeber:2017:FAF, author = "Florian Rathgeber and David A. Ham and Lawrence Mitchell and Michael Lange and Fabio Luporini and Andrew T. T. Mcrae and Gheorghe-Teodor Bercea and Graham R. Markall and Paul H. J. Kelly", title = "{Firedrake}: Automating the Finite Element Method by Composing Abstractions", journal = j-TOMS, volume = "43", number = "3", pages = "24:1--24:??", month = jan, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/2998441", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Oct 4 10:55:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=2998441", abstract = "Firedrake is a new tool for automating the numerical solution of partial differential equations. Firedrake adopts the domain-specific language for the finite element method of the FEniCS project, but with a pure Python runtime-only implementation centered on the composition of several existing and new abstractions for particular aspects of scientific computing. The result is a more complete separation of concerns that eases the incorporation of separate contributions from computer scientists, numerical analysts, and application specialists. These contributions may add functionality or improve performance. Firedrake benefits from automatically applying new optimizations. This includes factorizing mixed function spaces, transforming and vectorizing inner loops, and intrinsically supporting block matrix operations. Importantly, Firedrake presents a simple public API for escaping the UFL abstraction. This allows users to implement common operations that fall outside of pure variational formulations, such as flux limiters.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Rejitha:2017:EPC, author = "R. S. Rejitha and Shajulin Benedict and Suja A. Alex and Shany Infanto", title = "Energy prediction of {CUDA} application instances using dynamic regression models", journal = j-COMPUTING, volume = "99", number = "8", pages = "765--790", month = aug, year = "2017", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-016-0534-5", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Fri Feb 9 14:54:09 MST 2018", bibsource = "http://link.springer.com/journal/607/99/8; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Rizzardi:2017:ATS, author = "Mariarosaria Rizzardi", title = "{Algorithm 981}: {Talbot Suite DE}: Application of Modified {Talbot}'s Method to Solve Differential Problems", journal = j-TOMS, volume = "44", number = "2", pages = "18:1--18:23", month = sep, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3089248", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Sep 19 17:19:59 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "http://dl.acm.org/citation.cfm?id=3089248", abstract = "In order to solve a differential problem, the Laplace Transform method, when applicable, replaces the problem with a simpler one; the solution is obtained by solving the new problem and then by computing the inverse Laplace Transform of this function. In a numerical context, since the solution of the transformed problem consists of a sequence of Laplace Transform samples, most of the software for the numerical inversion cannot be used since the transform, among parameters, must be passed as a function. To fill this gap, we present Talbot Suite DE, a C software collection for Laplace Transform inversions, specifically designed for these problems and based on Talbot's method. It contains both sequential and parallel implementations; the latter is accomplished by means of OpenMP. We also report some performance results. Aimed at non-expert users, the software is equipped with several examples and a User Guide that includes the external documentation, explains how to use all the sample code, and reports its results about accuracy and efficiency. Some examples are entirely in C and others combine different programming languages (C/MATLAB, C/FORTRAN). The User Guide also contains useful hints to avoid possible errors issued during the compilation or execution of mixed-language code.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Mathematical Software", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Russo:2017:MPG, author = "Igor L. S. Russo and Heder S. Bernardino and Helio J. C. Barbosa", title = "A massively parallel Grammatical Evolution technique with {OpenCL}", journal = j-J-PAR-DIST-COMP, volume = "109", number = "??", pages = "333--349", month = nov, year = "2017", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Sat Aug 19 13:10:32 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S074373151730206X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Sato:2017:NIT, author = "Kento Sato and Dong H. Ahn and Ignacio Laguna and Gregory L. Lee and Martin Schulz and Christopher M. Chambreau", title = "Noise Injection Techniques to Expose Subtle and Unintended Message Races", journal = j-SIGPLAN, volume = "52", number = "8", pages = "89--101", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018767", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Debugging intermittently occurring bugs within MPI applications is challenging, and message races, a condition in which two or more sends race to match with a receive, are one of the common root causes. Many debugging tools have been proposed to help programmers resolve them, but their runtime interference perturbs the timing such that subtle races often cannot be reproduced with debugging tools. We present novel noise injection techniques to expose message races even under a tool's control. We first formalize this race problem in the context of non-deterministic parallel applications and use this analysis to determine an effective noise-injection strategy to uncover them. We codified these techniques in NINJA (Noise INJection Agent) that exposes these races without modification to the application. Our evaluations on synthetic cases as well as a real-world bug in Hypre-2.10.1 show that NINJA significantly helps expose races.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Schardl:2017:TEF, author = "Tao B. Schardl and William S. Moses and Charles E. Leiserson", title = "{Tapir}: Embedding Fork-Join Parallelism into {LLVM}'s Intermediate Representation", journal = j-SIGPLAN, volume = "52", number = "8", pages = "249--265", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018758", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This paper explores how fork-join parallelism, as supported by concurrency platforms such as Cilk and OpenMP, can be embedded into a compiler's intermediate representation (IR). Mainstream compilers typically treat parallel linguistic constructs as syntactic sugar for function calls into a parallel runtime. These calls prevent the compiler from performing optimizations across parallel control constructs. Remedying this situation is generally thought to require an extensive reworking of compiler analyses and code transformations to handle parallel semantics. Tapir is a compiler IR that represents logically parallel tasks asymmetrically in the program's control flow graph. Tapir allows the compiler to optimize across parallel control constructs with only minor changes to its existing analyses and code transformations. To prototype Tapir in the LLVM compiler, for example, we added or modified about 6000 lines of LLVM's 4-million-line codebase. Tapir enables LLVM's existing compiler optimizations for serial code --- including loop-invariant-code motion, common-subexpression elimination, and tail-recursion elimination --- to work with parallel control constructs such as spawning and parallel loops. Tapir also supports parallel optimizations such as loop scheduling.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Schmitt:2017:SCP, author = "Felix Schmitt and Robert Dietrich and Guido Juckeland", title = "Scalable critical-path analysis and optimization guidance for hybrid {MPI--CUDA} applications", journal = j-IJHPCA, volume = "31", number = "6", pages = "485--498", month = nov, year = "2017", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jan 6 10:31:59 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Sharma:2017:PDR, author = "Prateek Sharma and David Irwin and Prashant Shenoy", title = "Portfolio-driven Resource Management for Transient Cloud Servers", journal = j-POMACS, volume = "1", number = "1", pages = "5:1--5:??", month = jun, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3084442", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Fri Jun 16 09:11:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pomacs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://dl.acm.org/citation.cfm?id=3084442", abstract = "Cloud providers have begun to offer their surplus capacity in the form of low-cost transient servers, which can be revoked unilaterally at any time. While the low cost of transient servers makes them attractive for a wide range of applications, such as data processing and scientific computing, failures due to server revocation can severely degrade application performance. Since different transient server types offer different cost and availability tradeoffs, we present the notion of server portfolios that is based on financial portfolio modeling. Server portfolios enable construction of an 'optimal' mix of severs to meet an application's sensitivity to cost and revocation risk. We implement model-driven portfolios in a system called ExoSphere, and show how diverse applications can use portfolios and application-specific policies to gracefully handle transient servers. We show that ExoSphere enables widely-used parallel applications such as Spark, MPI, and BOINC to be made transiency-aware with modest effort. Our experiments show that allowing the applications to use suitable transiency-aware policies, ExoSphere is able to achieve 80\% cost savings when compared to on-demand servers and greatly reduces revocation risk compared to existing approaches.", acknowledgement = ack-nhfb, articleno = "5", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1567", } @Article{Silla:2017:BRG, author = "Federico Silla and Sergio Iserte and Carlos Rea{\~n}o and Javier Prades", title = "On the benefits of the remote {GPU} virtualization mechanism: The {rCUDA} case", journal = j-CCPE, volume = "29", number = "13", pages = "", day = "10", month = jul, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4072", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Jul 24 08:22:38 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Singh:2017:EER, author = "Amit Kumar Singh and Alok Prakash and Karunakar Reddy Basireddy and Geoff V. Merrett and Bashir M. Al-Hashimi", title = "Energy-Efficient Run-Time Mapping and Thread Partitioning of Concurrent {OpenCL} Applications on {CPU--GPU MPSoCs}", journal = j-TECS, volume = "16", number = "5s", pages = "147:1--147:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126548", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous Multi-Processor Systems-on-Chips (MPSoCs) containing CPU and GPU cores are typically required to execute applications concurrently. However, as will be shown in this paper, existing approaches are not well suited for concurrent applications as they are developed either by considering only a single application or they do not exploit both CPU and GPU cores at the same time. In this paper, we propose an energy-efficient run-time mapping and thread partitioning approach for executing concurrent OpenCL applications on both GPU and GPU cores while satisfying performance requirements. Depending upon the performance requirements, for each concurrently executing application, the mapping process finds the appropriate number of CPU cores and operating frequencies of CPU and GPU cores, and the partitioning process identifies an efficient partitioning of the applications' threads between CPU and GPU cores. We validate the proposed approach experimentally on the Odroid-XU3 hardware platform with various mixes of applications from the Polybench benchmark suite. Additionally, a case-study is performed with a real-world application SLAMBench. Results show an average energy saving of 32\% compared to existing approaches while still satisfying the performance requirements.", acknowledgement = ack-nhfb, articleno = "147", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", } @Article{Sotomayor:2017:ACG, author = "Rafael Sotomayor and Luis Miguel Sanchez and Javier Garcia Blas and Javier Fernandez and J. Daniel Garcia", title = "Automatic {CPU\slash GPU} Generation of Multi-versioned {OpenCL} Kernels for {C++} Scientific Applications", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "2", pages = "262--282", month = apr, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0425-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Mon Mar 13 15:25:22 MDT 2017", bibsource = "http://link.springer.com/journal/10766/45/2; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s10766-016-0425-6", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Steele:2017:UBP, author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan", title = "Using Butterfly-Patterned Partial Sums to Draw from Discrete Distributions", journal = j-SIGPLAN, volume = "52", number = "8", pages = "341--355", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018757", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We describe a SIMD technique for drawing values from multiple discrete distributions, such as sampling from the random variables of a mixture model, that avoids computing a complete table of partial sums of the relative probabilities. A table of alternate (``butterfly-patterned'') form is faster to compute, making better use of coalesced memory accesses; from this table, complete partial sums are computed on the fly during a binary search. Measurements using CUDA 7.5 on an NVIDIA Titan Black GPU show that this technique makes an entire machine-learning application that uses a Latent Dirichlet Allocation topic model with 1024 topics about about 13\% faster (when using single-precision floating-point data) or about 35\% faster (when using double-precision floating-point data) than doing a straightforward matrix transposition after using coalesced accesses.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Szo:2017:PET, author = "M{\'a}t{\'e} Sz{\H{o}}ke and Tam{\'a}s Istv{\'a}n J{\'o}zsa and {\'A}d{\'a}m Kolesz{\'a}r and Irene Moulitsas and L{\'a}szl{\'o} K{\"o}n{\"o}zsy", title = "Performance Evaluation of a Two-Dimensional Lattice {Boltzmann} Solver Using {CUDA} and {PGAS} {UPC} Based Parallelisation", journal = j-TOMS, volume = "44", number = "1", pages = "8:1--8:??", month = jul, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3085590", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Oct 4 10:55:07 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3085590", abstract = "The Unified Parallel C (UPC) language from the Partitioned Global Address Space (PGAS) family unifies the advantages of shared and local memory spaces and offers a relatively straightforward code parallelisation with the Central Processing Unit (CPU). In contrast, the Computer Unified Device Architecture (CUDA) development kit gives a tool to make use of the Graphics Processing Unit (GPU). We provide a detailed comparison between these novel techniques through the parallelisation of a two-dimensional lattice Boltzmann method based fluid flow solver. Our comparison between the CUDA and UPC parallelisation takes into account the required conceptual effort, the performance gain, and the limitations of the approaches from the application oriented developers' point of view. We demonstrated that UPC led to competitive efficiency with the local memory implementation. However, the performance of the shared memory code fell behind our expectations, and we concluded that the investigated UPC compilers could not efficiently treat the shared memory space. The CUDA implementation proved to be more complex compared to the UPC approach mainly because of the complicated memory structure of the graphics card which also makes GPUs suitable for the parallelisation of the lattice Boltzmann method.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Szoke:2017:PET, author = "M{\'a}t{\'e} Sz{\H{o}}ke and Tam{\'a}s Istv{\'a}n J{\'o}zsa and {\'A}d{\'a}m Kolesz{\'a}r and Irene Moulitsas and L{\'a}szl{\'o} K{\"o}n{\"o}zsy", title = "Performance Evaluation of a Two-Dimensional Lattice {Boltzmann} Solver Using {CUDA} and {PGAS UPC} Based Parallelisation", journal = j-TOMS, volume = "44", number = "1", pages = "8:1--8:22", month = jul, year = "2017", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3085590", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Fri Jul 14 16:39:28 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", abstract = "The Unified Parallel C (UPC) language from the Partitioned Global Address Space (PGAS) family unifies the advantages of shared and local memory spaces and offers a relatively straightforward code parallelisation with the Central Processing Unit (CPU). In contrast, the Computer Unified Device Architecture (CUDA) development kit gives a tool to make use of the Graphics Processing Unit (GPU). We provide a detailed comparison between these novel techniques through the parallelisation of a two-dimensional lattice Boltzmann method based fluid flow solver. Our comparison between the CUDA and UPC parallelisation takes into account the required conceptual effort, the performance gain, and the limitations of the approaches from the application oriented developers point of view. We demonstrated that UPC led to competitive efficiency with the local memory implementation. However, the performance of the shared memory code fell behind our expectations, and we concluded that the investigated UPC compilers could not efficiently treat the shared memory space. The CUDA implementation proved to be more complex compared to the UPC approach mainly because of the complicated memory structure of the graphics card which also makes GPUs suitable for the parallelisation of the lattice Boltzmann method.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Takafuji:2017:CCC, author = "Daisuke Takafuji and Koji Nakano and Yasuaki Ito and Jacir Bordim", title = "{C2CU}: a {CUDA--C} program generator for bulk execution of a sequential algorithm", journal = j-CCPE, volume = "29", number = "17", pages = "", day = "10", month = sep, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4022", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Sep 4 17:02:00 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Taylor:2017:AOO, author = "Ben Taylor and Vicent Sanz Marco and Zheng Wang", title = "Adaptive optimization for {OpenCL} programs on embedded heterogeneous systems", journal = j-SIGPLAN, volume = "52", number = "4", pages = "11--20", month = may, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140582.3081040", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:15 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Heterogeneous multi-core architectures consisting of CPUs and GPUs are commonplace in today's embedded systems. These architectures offer potential for energy efficient computing if the application task is mapped to the right core. Realizing such potential is challenging due to the complex and evolving nature of hardware and applications. This paper presents an automatic approach to map OpenCL kernels onto heterogeneous multi-cores for a given optimization criterion --- whether it is faster runtime, lower energy consumption or a trade-off between them. This is achieved by developing a machine learning based approach to predict which processor to use to run the OpenCL kernel and the host program, and at what frequency the processor should operate. Instead of hand-tuning a model for each optimization metric, we use machine learning to develop a unified framework that first automatically learns the optimization heuristic for each metric off-line, then uses the learned knowledge to schedule OpenCL kernels at runtime based on code and runtime information of the program. We apply our approach to a set of representative OpenCL benchmarks and evaluate it on an ARM big.LITTLE mobile platform. Our approach achieves over 93\% of the performance delivered by a perfect predictor.We obtain, on average, 1.2x, 1.6x, and 1.8x improvement respectively for runtime, energy consumption and the energy delay product when compared to a comparative heterogeneous-aware OpenCL task mapping scheme.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "LCTES '17 conference proceedings.", } @Article{Utterback:2017:POR, author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina Lee and Milind Kulkarni", title = "Processor-Oblivious Record and Replay", journal = j-SIGPLAN, volume = "52", number = "8", pages = "145--161", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018764", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Record-and-replay systems are useful tools for debugging non-deterministic parallel programs by first recording an execution and then replaying that execution to produce the same access pattern. Existing record-and-replay systems generally target thread-based execution models, and record the behaviors and interleavings of individual threads. Dynamic multithreaded languages and libraries, such as the Cilk family, OpenMP, TBB, etc., do not have a notion of threads. Instead, these languages provide a processor-oblivious model of programming, where programs expose task-parallelism using high-level constructs such as spawn/sync without regard to the number of threads/cores available to run the program. Thread-based record-and-replay would violate the processor-oblivious nature of these programs, as they incorporate the number of threads into the recorded information, constraining the replayed execution to the same number of threads. In this paper, we present a processor-oblivious record-and-replay scheme for such languages where record and replay can use different number of processors and both are scheduled using work stealing. We provide theoretical guarantees for our record and replay scheme --- namely that record is optimal for programs with one lock and replay is near-optimal for all cases. In addition, we implemented this scheme in the Cilk Plus runtime system and our evaluation indicates that processor-obliviousness does not cause substantial overheads.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Book{vanderPas:2017:UON, author = "Ruud van der Pas", title = "Using {OpenMP} --- the next step: affinity, accelerators, tasking, and {SIMD}", publisher = pub-MIT, address = pub-MIT:adr, pages = "xxi + 365", year = "2017", ISBN = "0-262-53478-9 (paperback)", ISBN-13 = "978-0-262-53478-9 (paperback)", LCCN = "QA76.642 .P427 2017", bibdate = "Sat Oct 5 07:54:47 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = "Scientific and engineering computation", abstract = "This book offers an up-to-date, practical tutorial on advanced features in the widely used OpenMP parallel programming model. Building on the previous volume, Using OpenMP: Portable Shared Memory Parallel Programming (MIT Press), this book goes beyond the fundamentals to focus on what has been changed and added to OpenMP since the 2.5 specifications. It emphasizes four major and advanced areas: thread affinity (keeping threads close to their data), accelerators (special hardware to speed up certain operations), tasking (to parallelize algorithms with a less regular execution flow), and SIMD (hardware assisted operations on vectors). As in the earlier volume, the focus is on practical usage, with major new features primarily introduced by example. Examples are restricted to C and C++, but are straightforward enough to be understood by Fortran programmers. After a brief recap of OpenMP 2.5, the book reviews enhancements introduced since 2.5. It then discusses in detail tasking, a major functionality enhancement; Non-Uniform Memory Access (NUMA) architectures, supported by OpenMP; SIMD, or Single Instruction Multiple Data; heterogeneous systems, a new parallel programming model to offload computation to accelerators; and the expected further development of OpenMP.", acknowledgement = ack-nhfb, subject = "Parallel programming (Computer science); Application program interfaces (Computer software); OpenMP (Application program interface)", tableofcontents = "Intro \\ Contents \\ Series Foreword \\ Foreword \\ Preface \\ 1 A Recap of OpenMP 2.5 \\ 1.1 OpenMP Directives and Syntax \\ 1.2 Creating a Parallel Program with OpenMP \\ 1.2.1 The Parallel Region \\ 1.2.2 The OpenMP Execution Model \\ 1.2.3 The OpenMP Memory Model \\ 1.3 The Worksharing Constructs \\ 1.3.1 The Loop Construct \\ 1.3.2 The Sections Construct \\ 1.3.3 The Single Construct \\ 1.3.4 The Fortran Workshare Construct \\ 1.3.5 The Combined Worksharing Constructs \\ 1.4 The Master Construct \\ 1.5 Nested Parallelism \\ 1.6 Synchronization Constructs \\ 1.6.1 The Barrier Construct \\ 1.6.2 The Critical Construct \\ 1.6.3 The Atomic Construct \\ 1.6.4 The Ordered Construct \\ 1.7 The OpenMP 2.5 Environment Variables \\ 1.8 The OpenMP 2.5 Runtime Functions \\ 1.9 Internal Control Variables in OpenMP \\ 1.10 Concluding Remarks \\ 2 New Features in OpenMP \\ 2.1 Enhancements to Existing Constructs \\ 2.1.1 The Schedule Clause \\ 2.1.2 The If Clause \\ 2.1.3 The Collapse Clause \\ 2.1.4 The Linear Clause \\ 2.1.5 The Critical Construct \\ 2.1.6 The Atomic Construct \\ 2.2 New Environment Variables \\ 2.3 New Runtime Functions \\ 2.3.1 Runtime Functions for Thread Management, Thread Scheduling, and Nested Parallelism \\ 2.3.2 Runtime Functions for Tasking, Cancellation, and Thread Affinity \\ 2.3.3 Runtime Functions for Locking \\ 2.3.4 Runtime Functions for Heterogeneous Systems \\ 2.3.5 Usage Examples of the New Runtime Functions \\ 2.4 New Functionality \\ 2.4.1 Changed Ownership of Locks \\ 2.4.2 Cancellation \\ 2.4.3 User-Defined Reduction \\ 2.4.4 The Doacross Loop \\ 2.5 Concluding Remarks \\ 3 Tasking \\ 3.1 Hello Task \\ 3.1.1 Parallelizing a Palindrome \\ 3.1.2 Parallelizing a Sentence with a Palindrome \\ 3.1.3 Closing Comments on the Palindrome Example \\ 3.2 Using Tasks to Parallelize a Linked List \\ 3.2.1 The Sequential Version of the Linked List Program \\ 3.2.2 The Parallel Version of the Linked List Program \\ 3.2.3 Closing Comments on the Linked List Example \\ 3.3 Sorting Things Out with Tasks \\ 3.3.1 The Sequential Quicksort Algorithm \\ 3.3.2 The OpenMP Quicksort Algorithm \\ 3.3.3 Fine-Tuning the OpenMP Quicksort Algorithm \\ 3.3.4 Closing Comments on the OpenMP Quicksort Algorithm \\ 3.4 Overlapping I/O and Computations Using Tasks \\ 3.4.1 Using Tasks and Task Dependences \\ 3.4.2 Using the Taskloop Construct \\ 3.4.3 Closing Comments on the Pipeline Example \\ 3.5 The Data Environment with Tasks \\ 3.6 What is a Task? \\ 3.7 Task Creation, Synchronization, and Scheduling \\ 3.8 The Taskloop Construct \\ 3.9 Concluding Remarks \\ 4 Thread Affinity \\ 4.1 The Characteristics of a cc-NUMA Architecture \\ 4.2 First Touch Data Placement \\ 4.2.1 The Pros and Cons of First Touch Data Placement \\ 4.2.2 How to Exploit the First Touch Policy \\ 4.3 The Need for Thread Affinity Support \\ 4.4 The OpenMP Thread Affinity Philosophy \\ 4.5 The OpenMP Places Concept \\ 4.5.1 Defining OpenMP Places Using Sets with Numbers \\ 4.5.2 The OpenMP Place List \\ 4.5.3 Defining OpenMP Places Using Abstract Names", } @Article{Vargas-Perez:2017:HMO, author = "Sandino Vargas-Perez and Fahad Saeed", title = "A Hybrid {MPI--OpenMP} Strategy to Speedup the Compression of Big Next-Generation Sequencing Datasets", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "28", number = "10", pages = "2760--2769", month = oct, year = "2017", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2692782", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Oct 12 06:58:12 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2017/10/07895161-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Waidyasooriya:2017:OBF, author = "Hasitha Muthumala Waidyasooriya and Yasuhiro Takei and Shunsuke Tatsumi and Masanori Hariyama", title = "{OpenCL}-Based {FPGA}-Platform for Stencil Computation and Its Optimization Methodology", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "28", number = "5", pages = "1390--1402", month = may, year = "2017", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2016.2614981", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Jun 15 05:46:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2017/05/07582502-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Wang:2017:CEG, author = "Siqi Wang and Guanwen Zhong and Tulika Mitra", title = "{CGPredict}: Embedded {GPU} Performance Estimation from Single-Threaded Applications", journal = j-TECS, volume = "16", number = "5s", pages = "146:1--146:??", month = oct, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3126546", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib", abstract = "Heterogeneous multiprocessor system-on-chip architectures are endowed with accelerators such as embedded GPUs and FPGAs capable of general-purpose computation. The application developers for such platforms need to carefully choose the accelerator with the maximum performance benefit. For a given application, usually, the reference code is specified in a high-level single-threaded programming language such as C. The performance of an application kernel on an accelerator is a complex interplay among the exposed parallelism, the compiler, and the accelerator architecture. Thus, determining the performance of a kernel requires its redevelopment into each accelerator-specific language, causing substantial wastage of time and effort. To aid the developer in this early design decision, we present an analytical framework CGPredict to predict the performance of a computational kernel on an embedded GPU architecture from un-optimized, single-threaded C code. The analytical approach provides insights on application characteristics which suggest further application-specific optimizations. The estimation error is as low as 2.66\% (average 9\%) compared to the performance of the same kernel written in native CUDA code running on NVIDIA Kepler embedded GPU. This low performance estimation error enables CGPredict to provide an early design recommendation of the accelerator starting from C code.", acknowledgement = ack-nhfb, articleno = "146", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", } @Article{Weber:2017:MAL, author = "Nicolas Weber and Michael Goesele", title = "{MATOG}: Array Layout Auto-Tuning for {CUDA}", journal = j-TACO, volume = "14", number = "3", pages = "28:1--28:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3106341", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Wed Sep 6 17:12:05 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Optimal code performance is (besides correctness and accuracy) the most important objective in compute intensive applications. In many of these applications, Graphic Processing Units (GPUs) are used because of their high amount of compute power. However, caused by their massively parallel architecture, the code has to be specifically adjusted to the underlying hardware to achieve optimal performance and therefore has to be reoptimized for each new generation. In reality, this is usually not the case as productive code is normally at least several years old and nobody has the time to continuously adjust existing code to new hardware. In recent years more and more approaches have emerged that automatically tune the performance of applications toward the underlying hardware. In this article, we present the MATOG auto-tuner and its concepts. It abstracts the array memory access in CUDA applications and automatically optimizes the code according to the used GPUs. MATOG only requires few profiling runs to analyze even complex applications, while achieving significant speedups over non-optimized code, independent of the used GPU generation and without the need to manually tune the code.", acknowledgement = ack-nhfb, articleno = "28", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Wickerson:2017:ACM, author = "John Wickerson and Mark Batty and Tyler Sorensen and George A. Constantinides", title = "Automatically comparing memory consistency models", journal = j-SIGPLAN, volume = "52", number = "1", pages = "190--204", month = jan, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3093333.3009838", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:14 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "A memory consistency model (MCM) is the part of a programming language or computer architecture specification that defines which values can legally be read from shared memory locations. Because MCMs take into account various optimisations employed by architectures and compilers, they are often complex and counterintuitive, which makes them challenging to design and to understand. We identify four tasks involved in designing and understanding MCMs: generating conformance tests, distinguishing two MCMs, checking compiler optimisations, and checking compiler mappings. We show that all four tasks are instances of a general constraint-satisfaction problem to which the solution is either a program or a pair of programs. Although this problem is intractable for automatic solvers when phrased over programs directly, we show how to solve analogous constraints over program executions, and then construct programs that satisfy the original constraints. Our technique, which is implemented in the Alloy modelling framework, is illustrated on several software- and architecture-level MCMs, both axiomatically and operationally defined. We automatically recreate several known results, often in a simpler form, including: distinctions between variants of the C11 MCM; a failure of the `SC-DRF guarantee' in an early C11 draft; that x86 is `multi-copy atomic' and Power is not; bugs in common C11 compiler optimisations; and bugs in a compiler mapping from OpenCL to AMD-style GPUs. We also use our technique to develop and validate a new MCM for NVIDIA GPUs that supports a natural mapping from OpenCL.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "POPL '17 conference proceedings.", } @Article{Winkler:2017:GSM, author = "Daniel Winkler and Michael Meister and Massoud Rezavand and Wolfgang Rauch", title = "{gpuSPHASE} --- A shared memory caching implementation for {$2$D} {SPH} using {CUDA}", journal = j-COMP-PHYS-COMM, volume = "213", number = "??", pages = "165--180", month = apr, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Feb 4 08:00:23 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465516303666", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655/", } @Article{Yam-Uicab:2017:FHT, author = "R. Yam-Uicab and J. L. Lopez-Martinez and J. A. Trejo-Sanchez and H. Hidalgo-Silva and S. Gonzalez-Segura", title = "A fast {Hough} Transform algorithm for straight lines detection in an image using {GPU} parallel computing with {CUDA-C}", journal = j-J-SUPERCOMPUTING, volume = "73", number = "11", pages = "4823--4842", month = nov, year = "2017", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-017-2051-5", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jan 6 08:59:18 MST 2018", bibsource = "http://link.springer.com/journal/11227/73/11; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{YarKhan:2017:PPN, author = "Asim YarKhan and Jakub Kurzak and Piotr Luszczek and Jack Dongarra", title = "Porting the {PLASMA} Numerical Library to the {OpenMP} Standard", journal = j-INT-J-PARALLEL-PROG, volume = "45", number = "3", pages = "612--633", month = jun, year = "2017", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-016-0441-6", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jun 24 11:37:59 MDT 2017", bibsource = "http://link.springer.com/journal/10766/45/3; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Yeh:2017:PFG, author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and Rudolf Eigenmann and Timothy G. Rogers", title = "{Pagoda}: Fine-Grained {GPU} Resource Virtualization for Narrow Tasks", journal = j-SIGPLAN, volume = "52", number = "8", pages = "221--234", month = aug, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3155284.3018754", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Fri Dec 1 18:56:12 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Massively multithreaded GPUs achieve high throughput by running thousands of threads in parallel. To fully utilize the hardware, workloads spawn work to the GPU in bulk by launching large tasks, where each task is a kernel that contains thousands of threads that occupy the entire GPU. GPUs face severe underutilization and their performance benefits vanish if the tasks are narrow, i.e., they contain {$<$} 500 threads. Latency-sensitive applications in network, signal, and image processing that generate a large number of tasks with relatively small inputs are examples of such limited parallelism. This paper presents Pagoda, a runtime system that virtualizes GPU resources, using an OS-like daemon kernel called MasterKernel. Tasks are spawned from the CPU onto Pagoda as they become available, and are scheduled by the MasterKernel at the warp granularity. Experimental results demonstrate that Pagoda achieves a geometric mean speedup of 5.70x over PThreads running on a 20-core CPU, 1.51x over CUDA-HyperQ, and 1.69x over GeMTC, the state-of- the-art runtime GPU task scheduling system.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '17 conference proceedings.", } @Article{Young-S:2017:OGI, author = "Luis E. Young-S. and Paulsamy Muruganandam and Sadhan K. Adhikari and Vladimir Loncar and Dusan Vudragovi{\'c} and Antun Balaz", title = "{OpenMP} {GNU} and {Intel} {Fortran} programs for solving the time-dependent {Gross--Pitaevskii} equation", journal = j-COMP-PHYS-COMM, volume = "220", number = "??", pages = "503--506", month = nov, year = "2017", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Sep 15 11:56:42 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517302321", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Zha:2017:IFM, author = "Yue Zha and Jing Li", title = "{IMEC}: A Fully Morphable In-Memory Computing Fabric Enabled by Resistive Crossbar", journal = j-IEEE-COMPUT-ARCHIT-LETT, volume = "16", number = "2", pages = "123--126", month = jul # "\slash " # dec, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1109/LCA.2017.2672558", ISSN = "1556-6056 (print), 1556-6064 (electronic)", ISSN-L = "1556-6056", bibdate = "Thu Jun 20 17:01:23 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "In this paper, we propose a fully morphable In-MEmory Computing (IMEC) fabric to better implement the concept of processing inside memory (PIM). Enabled by emerging nonvolatile memory, i.e., RRAM and its monolithic 3D integration, IMEC can be configured into one or a combination of four distinct functions, (1) logic, (2) ternary content addressable memory, (3) memory, and (4) interconnect. Thus, IMEC exploits a continuum of PIM capabilities across the whole spectrum, ranging from 0 percent (pure data storage) to 100 percent (pure compute engine), or intermediate states in between. IMEC can be modularly integrated into the DDRx memory subsystem, communicating with processors by the ordinary DRAM commands. Additionally, to reduce the programming burden, we provide a complete framework to compile applications written in high-level programming language (e.g., OpenCL) onto IMEC. This framework also enables code portability across different platforms for heterogeneous computing. By using this framework, several benchmarks are mapped onto IMEC for evaluating its performance, energy and resource utilization. The simulation results show that, IMEC reduces the energy consumption by 99.6 percent, and achieves 644x speedup, compared to a baseline CPU system. We further compare IMEC with FPGA architecture, and demonstrate that the performance improvement is not simply obtained by replacing SRAM cells with denser RRAM cells.", acknowledgement = ack-nhfb, affiliation = "Zha, Y (Reprint Author), Univ Wisconsin, Elect \& Comp Engn Dept, Madison, WI 53706 USA. Zha, Yue; Li, Jing, Univ Wisconsin, Elect \& Comp Engn Dept, Madison, WI 53706 USA.", author-email = "yzha3@wisc.edu jli587@wisc.edu", da = "2019-06-20", doc-delivery-number = "FR2AX", eissn = "1556-6064", fjournal = "IEEE Computer Architecture Letters", journal-iso = "IEEE Comput. Archit. Lett.", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208", keywords = "energy-efficiency computing; Non-volatile memory; processing-in-memory; TCAM", keywords-plus = "ARCHITECTURE", number-of-cited-references = "20", research-areas = "Computer Science", times-cited = "1", unique-id = "Zha:2017:IFM", web-of-science-categories = "Computer Science, Hardware \& Architecture", } @Article{Zhang:2017:DLN, author = "Jie Zhang and Xiaoyi Lu and Dhabaleswar K. (DK) Panda", title = "Designing Locality and {NUMA} Aware {MPI} Runtime for Nested Virtualization based {HPC} Cloud with {SR--IOV} Enabled {InfiniBand}", journal = j-SIGPLAN, volume = "52", number = "7", pages = "187--200", month = jul, year = "2017", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3140607.3050765", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Sat Sep 16 10:18:17 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Hypervisor-based virtualization solutions reveal good security and isolation, while container-based solutions make applications and workloads more portable and distributed in an effective, standardized and repeatable way. Therefore, nested virtualization based computing environments (e.g., container over virtual machine), which inherit the capabilities from both solutions, are becoming more and more attractive in clouds (e.g., running Docker over Amazon EC2 VMs). Recent studies have shown that running applications in either VMs or containers still has significant overhead, especially for I/O intensive workloads. This motivates us to investigate whether the nested virtualization based solution can be adopted to build high-performance computing (HPC) clouds for running MPI applications efficiently and where the bottlenecks lie. To eliminate performance bottlenecks, we propose a high-performance two-layer locality and NUMA aware MPI library, which is able to dynamically detect co-resident containers inside one VM as well as detect co-resident VM inside one host at MPI runtime. Thus the MPI processes across different containers and VMs can communicate to each other by shared memory or Cross Memory Attach (CMA) channels instead of network channel if they are co-resident. We further propose an enhanced NUMA aware hybrid design to utilize InfiniBand loopback based channel to optimize large message transfer across containers when they are running on different sockets. Performance evaluations show that compared with the performance of the state-of-art (1Layer) design, our proposed enhance-hybrid design can bring up to 184\%, 81\% and 12\% benefit on point-to-point, collective operations, and end applications. Compared with the default performance, our enhanced-hybrid design delivers up to 184\%, 85\% and 16\% performance improvement.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "VEE '17 conference proceedings.", } @Article{Zhu:2017:OAP, author = "Huming Zhu and Yanfei Wu and Pei Li and Peng Zhang and Zhe Ji and Maoguo Gong", title = "An {OpenCL}-accelerated parallel immunodominance clone selection algorithm for feature selection", journal = j-CCPE, volume = "29", number = "9", pages = "", day = "10", month = may, year = "2017", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.3838", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Mon Jul 24 08:22:36 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Zouaoui:2017:CNG, author = "Chakib Mustapha Anouar Zouaoui and Nasreddine Taleb", title = "{CL\_ARRAY}: a new generic library of multidimensional containers for {C++} compilers with extension for {OpenCL} framework", journal = j-COMP-LANGS-SYS-STRUCT, volume = "50", number = "??", pages = "53--81", month = dec, year = "2017", CODEN = "????", ISSN = "1477-8424 (print), 1873-6866 (electronic)", ISSN-L = "1477-8424", bibdate = "Fri Sep 15 11:36:13 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/complngs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S147784241630135X", acknowledgement = ack-nhfb, fjournal = "Computer Languages, Systems and Structures", journal-URL = "http://www.sciencedirect.com/science/journal/14778424/", } @Article{AlKadi:2018:GPC, author = "Muhammed {Al Kadi} and Benedikt Janssen and Jones Yudi and Michael Huebner", title = "General-Purpose Computing with Soft {GPUs} on {FPGAs}", journal = j-TRETS, volume = "11", number = "1", pages = "5:1--5:??", month = mar, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3173548", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Sat Oct 19 17:42:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", abstract = "Using field-programmable gate arrays (FPGAs) as a substrate to deploy soft graphics processing units (GPUs) would enable offering the FPGA compute power in a very flexible GPU-like tool flow. Application-specific adaptations like selective hardening of floating-point operations and instruction set subsetting would mitigate the high area and power demands of soft GPUs. This work explores the capabilities and limitations of soft General Purpose Computing on GPUs (GPGPU) for both fixed- and floating point arithmetic. For this purpose, we have developed FGPU: a configurable, scalable, and portable GPU architecture designed especially for FPGAs. FGPU is open-source and implemented entirely in RTL. It can be programmed in OpenCL and controlled through a Python API. This article introduces its hardware architecture as well as its tool flow. We evaluated the proposed GPGPU approach against multiple other solutions. In comparison to homogeneous Multi-Processor System-On-Chips (MPSoCs), we found that using a soft GPU is a Pareto-optimal solution regarding throughput per area and energy consumption. On average, FGPU has a 2.9$ \times $ better compute density and 11.2$ \times $ less energy consumption than a single MicroBlaze processor when computing in IEEE-754 floating-point format. An average speedup of about 4$ \times $ over the ARM Cortex-A9 supported with the NEON vector co-processor has been measured for fixed- or floating-point benchmarks. In addition, the biggest FGPU cores we could implement on a Xilinx Zynq-7000 System-On-Chip (SoC) can deliver similar performance to equivalent implementations with High-Level Synthesis (HLS).", acknowledgement = ack-nhfb, articleno = "5", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Article{Amer:2018:LCM, author = "Abdelhalim Amer and Huiwei Lu and Pavan Balaji and Milind Chabbi and Yanjie Wei and Jeff Hammond and Satoshi Matsuoka", title = "Lock Contention Management in Multithreaded {MPI}", journal = j-TOPC, volume = "5", number = "3", pages = "12:1--12:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3275443", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Wed Jan 23 16:12:26 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3275443", abstract = "In this article, we investigate contention management in lock-based thread-safe MPI libraries. Specifically, we make two assumptions: (1) locks are the only form of synchronization when protecting communication paths; and (2) contention occurs, and thus serialization is unavoidable. Our work distinguishes between lock acquisitions with respect to work being performed inside a critical section; productive vs. unproductive. Waiting for message reception without doing anything else inside a critical section is an example of unproductive lock acquisition. We show that the high-throughput nature of modern scalable locking protocols translates into better communication progress for throughput-intensive MPI communication but negatively impacts latency-sensitive communication because of overzealous unproductive lock acquisition. To reduce unproductive lock acquisitions, we devised a method that promotes threads with productive work using a generic two-level priority locking protocol. Our results show that using a high-throughput protocol for productive work and a fair protocol for less productive code paths ensures the best tradeoff for fine-grained communication, whereas a fair protocol is sufficient for more coarse-grained communication. Although these efforts have been rewarding, scalability degradation remains significant. We discuss techniques that diverge from the pure locking model and offer the potential to further improve scalability.", acknowledgement = ack-nhfb, articleno = "12", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Arif:2018:RBP, author = "Mahwish Arif and Hans Vandierendonck", title = "Reducing the burden of parallel loop schedulers for many-core processors", journal = j-SIGPLAN, volume = "53", number = "1", pages = "383--384", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178517", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "This work proposes a low-overhead half-barrier pattern to schedule fine-grain parallel loops and considers its integration in the Intel OpenMP and Cilkplus schedulers. Experimental evaluation demonstrates that the scheduling overhead of our techniques is 43\% lower than Intel OpenMP and 12.1x lower than Cilk. We observe 22\% speedup on 48 threads, with a peak of 2.8x speedup.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '18 proceedings.", } @Article{Aydin:2018:RTP, author = "Semra Aydin and Refik Samet and Omer Faruk Bay", title = "Real-time parallel image processing applications on multicore {CPUs} with {OpenMP} and {GPGPU} with {CUDA}", journal = j-J-SUPERCOMPUTING, volume = "74", number = "6", pages = "2255--2275", month = jun, year = "2018", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-017-2168-6", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:12 MDT 2019", bibsource = "http://link.springer.com/journal/11227/74/6; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Azimi:2018:SVS, author = "Reza Azimi and Tyler Fox and Wendy Gonzalez and Sherief Reda", title = "Scale-Out vs Scale-Up: A Study of {ARM}-based {SoCs} on Server-Class Workloads", journal = j-TOMPECS, volume = "3", number = "4", pages = "18:1--18:??", month = sep, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3232162", ISSN = "2376-3639", bibdate = "Sat Sep 21 07:21:16 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tompecs.bib", URL = "https://dl.acm.org/citation.cfm?id=3232162", abstract = "ARM 64-bit processing has generated enthusiasm to develop ARM-based servers that are targeted for both data centers and supercomputers. In addition to the server-class components and hardware advancements, the ARM software environment has grown substantially over the past decade. Major development ecosystems and libraries have been ported and optimized to run on ARM, making ARM suitable for server-class workloads. There are two trends in available ARM SoCs: mobile-class ARM SoCs that rely on the heterogeneous integration of a mix of CPU cores, GPGPU streaming multiprocessors (SMs), and other accelerators, and the server-class SoCs that instead rely on integrating a larger number of CPU cores with no GPGPU support and a number of IO accelerators. For scaling the number of processing cores, there are two different paradigms: mobile-class SoCs that use scale-out architecture in the form of a cluster of simpler systems connected over a network, and server-class ARM SoCs that use the scale-up solution and leverage symmetric multiprocessing to pack a large number of cores on the chip. In this article, we present ScaleSoC cluster, which is a scale-out solution based on mobile class ARM SoCs. ScaleSoC leverages fast network connectivity and GPGPU acceleration to improve performance and energy efficiency compared to previous ARM scale-out clusters. We consider a wide range of modern server-class parallel workloads to study both scaling paradigms, including latency-sensitive transactional workloads, MPI-based CPU and GPGPU-accelerated scientific applications, and emerging artificial intelligence workloads. We study the performance and energy efficiency of ScaleSoC compared to server-class ARM SoCs and discrete GPGPUs in depth. We quantify the network overhead on the performance of ScaleSoC and show that packing a large number of ARM cores on a single chip does not necessarily guarantee better performance, due to the fact that shared resources, such as last-level cache, become performance bottlenecks. We characterize the GPGPU accelerated workloads and demonstrate that for applications that can leverage the better CPU-GPGPU balance of the ScaleSoC cluster, performance and energy efficiency improve compared to discrete GPGPUs.", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Modeling and Performance Evaluation of Computing Systems (TOMPECS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1525", } @Article{Bazow:2018:MPS, author = "Dennis Bazow and Ulrich Heinz and Michael Strickland", title = "Massively parallel simulations of relativistic fluid dynamics on graphics processing units with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "225", number = "??", pages = "92--113", month = apr, year = "2018", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2017.01.015", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Feb 28 14:39:27 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300279", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Belviranli:2018:JDA, author = "Mehmet E. Belviranli and Seyong Lee and Jeffrey S. Vetter and Laxmi N. Bhuyan", title = "{Juggler}: a dependence-aware task-based execution framework for {GPUs}", journal = j-SIGPLAN, volume = "53", number = "1", pages = "54--67", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178492", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Scientific applications with single instruction, multiple data (SIMD) computations show considerable performance improvements when run on today's graphics processing units (GPUs). However, the existence of data dependences across thread blocks may significantly impact the speedup by requiring global synchronization across multiprocessors (SMs) inside the GPU. To efficiently run applications with interblock data dependences, we need fine-granular task-based execution models that will treat SMs inside a GPU as stand-alone parallel processing units. Such a scheme will enable faster execution by utilizing all internal computation elements inside the GPU and eliminating unnecessary waits during device-wide global barriers. In this paper, we propose Juggler, a task-based execution scheme for GPU workloads with data dependences. The Juggler framework takes applications embedding OpenMP 4.5 tasks as input and executes them on the GPU via an efficient in-device runtime, hence eliminating the need for kernel-wide global synchronization. Juggler requires no or little modification to the source code, and once launched, the runtime entirely runs on the GPU without relying on the host through the entire execution. We have evaluated Juggler on an NVIDIA Tesla P100 GPU and obtained up to 31\% performance improvement against global barrier based implementation, with minimal runtime overhead.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '18 proceedings.", } @Article{Benedict:2018:SES, author = "Shajulin Benedict", title = "{SCALE-EA}: A Scalability Aware Performance Tuning Framework for {OpenMP} Applications", journal = j-SCPE, volume = "19", number = "1", pages = "15--30", month = "????", year = "2018", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:50 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1390", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Burtscher:2018:HQF, author = "Martin Burtscher and Sindhu Devale and Sahar Azimi and Jayadharini Jaiganesh and Evan Powers", title = "A High-Quality and Fast Maximal Independent Set Implementation for {GPUs}", journal = j-TOPC, volume = "5", number = "2", pages = "8:1--8:??", month = jan, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3291525", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Wed Jan 23 16:12:26 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Computing a maximal independent set is an important step in many parallel graph algorithms. This article introduces ECL-MIS, a maximal independent set implementation that works well on GPUs. It includes key optimizations to speed up computation, reduce the memory footprint, and increase the set size. Its CUDA implementation requires fewer than 30 kernel statements, runs asynchronously, and produces a deterministic result. It outperforms the maximal independent set implementations of Pannotia, CUSP, and IrGL on each of the 16 tested graphs of various types and sizes. On a Titan X GPU, ECL-MIS is between 3.9 and 100 times faster (11.5 times, on average). ECL-MIS running on the GPU is also faster than the parallel CPU codes Ligra, Ligra+, and PBBS running on 20 Xeon cores, which it outperforms by 4.1 times, on average. At the same time, ECL-MIS produces maximal independent sets that are up to 52\% larger (over 10\%, on average) compared to these preexisting CPU and GPU implementations. Whereas these codes produce maximal independent sets that are, on average, about 15\% smaller than the largest possible such sets, ECL-MIS sets are less than 6\% smaller than the maximum independent sets.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Bylina:2018:EEO, author = "Beata Bylina and Jaroslaw Bylina", title = "An Experimental Evaluation of the {OpenMP} Thread Mapping for {LU} Factorisation on {Xeon Phi} Coprocessor and on Hybrid {CPU-MIC} Platform", journal = j-SCPE, volume = "19", number = "3", pages = "259--274", month = "????", year = "2018", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:50 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1373", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Castello:2018:EIR, author = "Adri{\'a}n Castell{\'o} and Antonio J. Pe{\~n}a and Rafael Mayo and Judit Planas and Enrique S. Quintana-Ort{\'{\i}} and Pavan Balaji", title = "Exploring the interoperability of remote {GPGPU} virtualization using {rCUDA} and directive-based programming models", journal = j-J-SUPERCOMPUTING, volume = "74", number = "11", pages = "5628--5642", month = nov, year = "2018", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-016-1791-y", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:09 MDT 2019", bibsource = "http://link.springer.com/journal/11227/74/11; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Chen:2018:FOB, author = "Cen Chen and Kenli Li and Aijia Ouyang and Keqin Li", title = "{FlinkCL}: An {OpenCL}-Based In-Memory Computing Architecture on Heterogeneous {CPU--GPU} Clusters for Big Data", journal = j-IEEE-TRANS-COMPUT, volume = "67", number = "12", pages = "1765--1779", month = "????", year = "2018", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2018.2839719", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Nov 8 07:18:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://ieeexplore.ieee.org/document/8362980/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Clay:2018:GAP, author = "M. P. Clay and D. Buaria and P. K. Yeung and T. Gotoh", title = "{GPU} acceleration of a petascale application for turbulent mixing at high {Schmidt} number using {OpenMP 4.5}", journal = j-COMP-PHYS-COMM, volume = "228", number = "??", pages = "100--114", month = jul, year = "2018", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.02.020", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu May 31 14:21:46 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518300596", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Cowles:2018:ISB, author = "Mary Kathryn Cowles and Stephen Bonett and Michael Seedorff", title = "Independent sampling for {Bayesian} normal conditional autoregressive models with {OpenCL} acceleration", journal = j-COMP-STAT, volume = "33", number = "1", pages = "159--177", month = mar, year = "2018", CODEN = "CSTAEB", DOI = "https://doi.org/10.1007/s00180-017-0752-0", ISSN = "0943-4062 (print), 1613-9658 (electronic)", ISSN-L = "0943-4062", bibdate = "Thu Jun 18 16:19:50 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/compstat.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/article/10.1007/s00180-017-0752-0", acknowledgement = ack-nhfb, ajournal = "Comp. Stat.", fjournal = "Computational Statistics", journal-URL = "http://link.springer.com/journal/180", } @Article{Davina:2018:MCP, author = "A. Lamas Davi{\~n}a and J. E. Roman", title = "{MPI-CUDA} parallel linear solvers for block-tridiagonal matrices in the context of {SLEPc}'s eigensolvers", journal = j-PARALLEL-COMPUTING, volume = "74", number = "??", pages = "118--135", month = "????", year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2017.11.006", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Apr 3 13:55:32 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301874", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dieguez:2018:SLP, author = "Adri{\'a}n P{\'e}rez Di{\'e}guez and Margarita Amor and Jacobo Lobeiras and Ram{\'o}n Doallo", title = "Solving Large Problem Sizes of Index-Digit Algorithms on {GPU}: {FFT} and Tridiagonal System Solvers", journal = j-IEEE-TRANS-COMPUT, volume = "67", number = "1", pages = "86--101", month = jan, year = "2018", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2017.2723879", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Dec 14 07:11:27 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ieeexplore.ieee.org/document/7970194/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", keywords = "Computer architecture; CUDA; CUSPARSE; FFT; GPU; Graphics processing units; Instruction sets; Kernel; medium problem sizes; Proposals; Signal processing algorithms; Synchronization; tridiagonal systems; tuning", } @Article{Eddelbuettel:2018:BRN, author = "Dirk Eddelbuettel", title = "Book Review: {Norman Matloff. \booktitle{Parallel Computing for Data Science: With Examples in R, C++, and CUDA}. Boca Raton: CRC Press}", journal = j-BIOMETRICS, volume = "74", number = "2", pages = "770--770", month = jun, year = "2018", CODEN = "BIOMB6", DOI = "https://doi.org/10.1111/biom.12896", ISSN = "0006-341X (print), 1541-0420 (electronic)", ISSN-L = "0006-341X", bibdate = "Thu Jun 25 10:48:44 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/biometrics2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/s-plus.bib", acknowledgement = ack-nhfb, ajournal = "Biometrics", fjournal = "Biometrics", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1541-0420", onlinedate = "26 June 2018", } @Article{Faraji:2018:DCG, author = "Iman Faraji and Ahmad Afsahi", title = "Design considerations for {GPU}-aware collective communications in {MPI}", journal = j-CCPE, volume = "30", number = "17", pages = "e4667:1--e4667:??", day = "10", month = sep, year = "2018", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4667", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Mar 28 08:07:51 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "18 May 2018", } @Article{Ferreira:2018:CMM, author = "Kurt B. Ferreira and Scott Levy and Kevin Pedretti and Ryan E. Grant", title = "Characterizing {MPI} matching via trace-based simulation", journal = j-PARALLEL-COMPUTING, volume = "77", number = "??", pages = "57--83", month = sep, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.05.005", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jan 7 15:25:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301467", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gallardo:2018:EMM, author = "Esthela Gallardo and J{\'e}r{\^o}me Vienne and Leonardo Fialho and Patricia Teller and James Browne", title = "Employing {MPI\_T} in {MPI} Advisor to optimize application performance", journal = j-IJHPCA, volume = "32", number = "6", pages = "882--896", day = "1", month = nov, year = "2018", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342016684005", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:52 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342016684005", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Gerbessiotis:2018:SIS, author = "Alexandros V. Gerbessiotis", title = "A Study of Integer Sorting on Multicores", journal = j-PARALLEL-PROCESS-LETT, volume = "28", number = "04", pages = "??--??", month = dec, year = "2018", DOI = "https://doi.org/10.1142/S0129626418500147", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Mon Mar 29 12:30:05 MDT 2021", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.worldscientific.com/doi/10.1142/S0129626418500147", abstract = "Integer sorting on multicores and GPUs can be realized by a variety of approaches that include variants of distribution-based methods such as radix-sort, comparison-oriented algorithms such as deterministic regular sampling and random sampling parallel sorting, and network-based algorithms such as Batcher's bitonic sorting algorithm. In this work we present an experimental study of integer sorting on multicore processors. We have implemented serial and parallel radix-sort for various radixes, deterministic regular oversampling, and random oversampling parallel sorting, including new variants of ours, and also some previously little explored or unexplored variants of bitonic-sort and odd-even transposition sort. The study uses multithreading and multiprocessing parallel programming libraries with the same C language code working under Open MPI, MulticoreBSP, and BSPlib. We first provide some general high-level observations on the performance of these implementations. If we can conclude anything is that accurate prediction of performance by taking into consideration architecture dependent features such as the structure and characteristics of multiple memory hierarchies is difficult and more often than not untenable. To some degree this is affected by the overhead imposed by the high-level library used in the programming effort. Another objective is to model the performance of these algorithms and their implementations under the MBSP (Multi-memory BSP) model. Despite the limitations mentioned above, we can still draw some reliable conclusions and reason about the performance of these implementations using the MBSP model, thus making MBSP useful and usable.", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Gerstenberger:2018:EHS, author = "Robert Gerstenberger and Maciej Besta and Torsten Hoefler", title = "Enabling highly scalable remote memory access programming with {MPI-3} one sided", journal = j-CACM, volume = "61", number = "10", pages = "106--113", month = oct, year = "2018", CODEN = "CACMA2", DOI = "https://doi.org/10.1145/3264413", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Thu Sep 27 11:55:45 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cacm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://cacm.acm.org/magazines/2018/10/231375/fulltext", abstract = "Modern high-performance networks offer remote direct memory access (RDMA) that exposes a process' virtual address space to other processes in the network. The Message Passing Interface (MPI) specification has recently been extended with a programming interface called MPI-3 Remote Memory Access (MPI-3 RMA) for efficiently exploiting state-of-the-art RDMA features. MPI-3 RMA enables a powerful programming model that alleviates many message passing downsides. In this work, we design and develop bufferless protocols that demonstrate how to implement this interface and support scaling to millions of cores with negligible memory consumption while providing highest performance and minimal overheads. To arm programmers, we provide a spectrum of performance models for RMA functions that enable rigorous mathematical analysis of application performance and facilitate the development of codes that solve given tasks within specified time and energy budgets. We validate the usability of our library and models with several application studies with up to half a million processes. In a wider sense, our work illustrates how to use RMA principles to accelerate computation- and data-intensive codes.", acknowledgement = ack-nhfb, fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", } @Article{Gianinazzi:2018:CAP, author = "Lukas Gianinazzi and Pavel Kalvoda and Alessandro {De Palma} and Maciej Besta and Torsten Hoefler", title = "Communication-avoiding parallel minimum cuts and connected components", journal = j-SIGPLAN, volume = "53", number = "1", pages = "219--232", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178504", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "We present novel scalable parallel algorithms for finding global minimum cuts and connected components, which are important and fundamental problems in graph processing. To take advantage of future massively parallel architectures, our algorithms are communication-avoiding: they reduce the costs of communication across the network and the cache hierarchy. The fundamental technique underlying our work is the randomized sparsification of a graph: removing a fraction of graph edges, deriving a solution for such a sparsified graph, and using the result to obtain a solution for the original input. We design and implement sparsification with O (1) synchronization steps. Our global minimum cut algorithm decreases communication costs and computation compared to the state-of-the-art, while our connected components algorithm incurs few cache misses and synchronization steps. We validate our approach by evaluating MPI implementations of the algorithms on a petascale supercomputer. We also provide an approximate variant of the minimum cut algorithm and show that it approximates the exact solutions well while using a fraction of cores in a fraction of time.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '18 proceedings.", } @Article{Goglin:2018:HTM, author = "Brice Goglin and Emmanuel Jeannot and Farouk Mansouri and Guillaume Mercier", title = "Hardware topology management in {MPI} applications through hierarchical communicators", journal = j-PARALLEL-COMPUTING, volume = "76", number = "??", pages = "70--90", month = aug, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.05.006", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jun 4 07:40:18 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301480", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gomez-Folgar:2018:MPA, author = "F. Gomez-Folgar and G. Indalecio and N. Seoane and T. F. Pena and A. J. Garcia-Loureiro", title = "{MPI-Performance-Aware-Reallocation}: method to optimize the mapping of processes applied to a cloud infrastructure", journal = j-COMPUTING, volume = "100", number = "2", pages = "211--226", month = feb, year = "2018", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-017-0573-6", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Wed Nov 7 08:19:16 MST 2018", bibsource = "http://link.springer.com/journal/607/100/2; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Gonzalez-Dominguez:2018:MPC, author = "Jorge Gonzalez-Dominguez and Maria J. Martin", title = "{MPIGeneNet}: Parallel Calculation of Gene Co-Expression Networks on Multicore Clusters", journal = j-TCBB, volume = "15", number = "5", pages = "1732--1737", month = sep, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2761340", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In this work, we present MPIGeneNet, a parallel tool that applies Pearson's correlation and Random Matrix Theory to construct gene co-expression networks. It is based on the state-of-the-art sequential tool RMTGeneNet, which provides networks with high robustness and sensitivity at the expenses of relatively long runtimes for large scale input datasets. MPIGeneNet returns the same results as RMTGeneNet but improves the memory management, reduces the I/O cost, and accelerates the two most computationally demanding steps of co-expression network construction by exploiting the compute capabilities of common multicore CPU clusters. Our performance evaluation on two different systems using three typical input datasets shows that MPIGeneNet is significantly faster than RMTGeneNet. As an example, our tool is up to 175.41 times faster on a cluster with eight nodes, each one containing two 12-core Intel Haswell processors. The source code of MPIGeneNet, as well as a reference manual, are available at https://sourceforge.net/projects/mpigenenet/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Gupta:2018:ALQ, author = "Sourendu Gupta and Pushan Majumdar", title = "Accelerating lattice {QCD} simulations with 2 flavors of staggered fermions on multiple {GPUs} using {OpenACC} --- a first attempt", journal = j-COMP-PHYS-COMM, volume = "228", number = "??", pages = "44--53", month = jul, year = "2018", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.03.008", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Thu May 31 14:21:46 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518300808", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Halver:2018:FPM, author = "Rene Halver and Wilhelm Homberg and Godehard Sutmann", title = "Function portability of molecular dynamics on heterogeneous parallel architectures with {OpenCL}", journal = j-J-SUPERCOMPUTING, volume = "74", number = "4", pages = "1522--1533", month = apr, year = "2018", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-017-2232-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:11 MDT 2019", bibsource = "http://link.springer.com/journal/11227/74/4; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Huang:2018:ACO, author = "Kai Huang and Biao Hu and Long Chen and Alois Knoll and Zhihua Wang", title = "{Adas} on {Cots} with {OpenCL}: A Case Study with Lane Detection", journal = j-IEEE-TRANS-COMPUT, volume = "67", number = "4", pages = "559--565", month = "????", year = "2018", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2017.2759203", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Mar 15 08:52:31 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ieeexplore.ieee.org/document/8057795/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Imbernon:2018:ELS, author = "Baldomero Imbern{\'o}n and Javier Prades and Domingo Gim{\'e}nez and Jos{\'e} M. Cecilia and Federico Silla", title = "Enhancing large-scale docking simulation on heterogeneous systems: An {MPI} vs {rCUDA} study", journal = j-FUT-GEN-COMP-SYS, volume = "79 (part 1)", number = "??", pages = "26--37", year = "2018", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2017.08.050", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Nov 24 15:16:17 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.sciencedirect.com/science/article/pii/S0167739X17309974", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", keywords = "Heterogeneous computing; HPC; Metaheuristics; rCUDA; Virtual screening", } @Article{Jambunathan:2018:COB, author = "Revathi Jambunathan and Deborah A. Levin", title = "{CHAOS}: an octree-based {PIC--DSMC} code for modeling of electron kinetic properties in a plasma plume using {MPI--CUDA} parallelization", journal = j-J-COMPUT-PHYS, volume = "373", number = "??", pages = "571--604", day = "15", month = nov, year = "2018", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2018.07.005", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Thu Sep 20 17:02:49 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999118304601", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Kamburugamuve:2018:AML, author = "Supun Kamburugamuve and Pulasthi Wickramasinghe and Saliya Ekanayake and Geoffrey C. Fox", title = "Anatomy of machine learning algorithm implementations in {MPI}, {Spark}, and {Flink}", journal = j-IJHPCA, volume = "32", number = "1", pages = "61--73", month = jan, year = "2018", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jan 6 10:32:00 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Kang:2018:PRS, author = "Zhijiang Kang and Ze Deng and Wei Han and Dongmei Zhang", title = "Parallel Reservoir Simulation with {OpenACC} and Domain Decomposition", journal = j-ALGORITHMS-BASEL, volume = "11", number = "12", month = dec, year = "2018", CODEN = "ALGOCH", DOI = "https://doi.org/10.3390/a11120213", ISSN = "1999-4893 (electronic)", ISSN-L = "1999-4893", bibdate = "Fri May 3 14:18:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/algorithms.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.mdpi.com/1999-4893/11/12/213", acknowledgement = ack-nhfb, articleno = "??", fjournal = "Algorithms (Basel)", journal-URL = "https://www.mdpi.com/journal/algorithms", pagecount = "??", pubdates = "Received: 16 November 2018 / Revised: 5 December 2018 / Accepted: 14 December 2018 / Published: 18 December 2018", } @Article{Kono:2018:EOW, author = "Fumiya Kono and Naohito Nakasato and Kensaku Hayashi and Alexander Vazhenin and Stanislav Sedukhin", title = "Evaluations of {OpenCL-written} tsunami simulation on {FPGA} and comparison with {GPU} implementation", journal = j-J-SUPERCOMPUTING, volume = "74", number = "6", pages = "2747--2775", month = jun, year = "2018", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-018-2315-8", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:12 MDT 2019", bibsource = "http://link.springer.com/journal/11227/74/6; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Kotsifakou:2018:HHP, author = "Maria Kotsifakou and Prakalp Srivastava and Matthew D. Sinclair and Rakesh Komuravelli and Vikram Adve and Sarita Adve", title = "{HPVM}: heterogeneous parallel virtual machine", journal = j-SIGPLAN, volume = "53", number = "1", pages = "68--80", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178493", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", abstract = "We propose a parallel program representation for heterogeneous systems, designed to enable performance portability across a wide range of popular parallel hardware, including GPUs, vector instruction sets, multicore CPUs and potentially FPGAs. Our representation, which we call HPVM, is a hierarchical dataflow graph with shared memory and vector instructions. HPVM supports three important capabilities for programming heterogeneous systems: a compiler intermediate representation (IR), a virtual instruction set (ISA), and a basis for runtime scheduling; previous systems focus on only one of these capabilities. As a compiler IR, HPVM aims to enable effective code generation and optimization for heterogeneous systems. As a virtual ISA, it can be used to ship executable programs, in order to achieve both functional portability and performance portability across such systems. At runtime, HPVM enables flexible scheduling policies, both through the graph structure and the ability to compile individual nodes in a program to any of the target devices on a system. We have implemented a prototype HPVM system, defining the HPVM IR as an extension of the LLVM compiler IR, compiler optimizations that operate directly on HPVM graphs, and code generators that translate the virtual ISA to NVIDIA GPUs, Intel's AVX vector units, and to multicore X86-64 processors. Experimental results show that HPVM optimizations achieve significant performance improvements, HPVM translators achieve performance competitive with manually developed OpenCL code for both GPUs and vector hardware, and that runtime scheduling policies can make use of both program and runtime information to exploit the flexible compilation capabilities. Overall, we conclude that the HPVM representation is a promising basis for achieving performance portability and for implementing parallelizing compilers for heterogeneous parallel systems.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '18 proceedings.", } @Article{Li:2018:CER, author = "Xiangbo Li and Mohsen Amini Salehi and Magdy Bayoumi and Nian-Feng Tzeng and Rajkumar Buyya", title = "Cost-Efficient and Robust On-Demand Video Transcoding Using Heterogeneous Cloud Services", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "29", number = "3", pages = "556--571", month = "????", year = "2018", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2766069", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 15 06:03:25 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ieeexplore.ieee.org/document/8081853/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Li:2018:COM, author = "Shigang Li and Yunquan Zhang and Torsten Hoefler", title = "Cache-Oblivious {MPI} All-to-All Communications Based on {Morton} Order", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "29", number = "3", pages = "542--555", month = "????", year = "2018", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2017.2768413", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 15 06:03:25 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://ieeexplore.ieee.org/document/8091010/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", } @Article{Liang:2018:FMP, author = "Yun Liang and Shuo Wang and Wei Zhang", title = "{FlexCL}: A Model of Performance and Power for {OpenCL} Workloads on {FPGAs}", journal = j-IEEE-TRANS-COMPUT, volume = "67", number = "12", pages = "1750--1764", month = "????", year = "2018", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2018.2840686", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Nov 8 07:18:03 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://ieeexplore.ieee.org/document/8365849/", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Lin:2018:CHM, author = "Han Lin and Zhichao Su and Xiandong Meng and Xu Jin and Zhong Wang and Wenting Han and Hong An and Mengxian Chi and Zheng Wu", title = "Combining {Hadoop} with {MPI} to Solve Metagenomics Problems that are both Data- and Compute-intensive", journal = j-INT-J-PARALLEL-PROG, volume = "46", number = "4", pages = "762--775", month = aug, year = "2018", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-017-0524-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Fri Oct 11 08:37:50 MDT 2019", bibsource = "http://link.springer.com/journal/10766/46/4; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @InProceedings{Malakhov:2018:CMT, author = "Anton Malakhov and David Liu and Anton Gorshkov and Terry Wilmarth", editor = "Fatih Akici and David Lippa and Dillon Niederhut and M. Pacer", booktitle = "Proceedings of the {17th Python in Science Conference, Austin, TX, 9--15 July 2018}", title = "Composable Multi-Threading and Multi-Processing for Numeric Libraries", publisher = "????", address = "????", pages = "15--21", year = "2018", bibdate = "Wed Aug 1 09:03:36 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib", URL = "http://conference.scipy.org/proceedings/scipy2018/anton_malakhov.html", abstract = "Python is popular among scientific communities that value its simplicity and power, especially as it comes along with numeric libraries such as NumPy, SciPy, Dask, and Numba. As CPU core counts keep increasing, these modules can make use of many cores via multi-threading for efficient multi-core parallelism. However, threads can interfere with each other leading to overhead and inefficiency if used together in a single application on machines with a large number of cores. This performance loss can be prevented if all multi-threaded modules are coordinated. This paper continues the work started in AMala16 by introducing more approaches to coordination for both multi-threading and multi-processing cases. In particular, we investigate the use of static settings, limiting the number of simultaneously active OpenMP parallel regions, and optional parallelism with Intel Threading Building Blocks (Intel TBB). We will show how these approaches help to unlock additional performance for numeric applications on multi-core systems.", acknowledgement = ack-nhfb, keywords = "Dask; GIL; Joblib; Multi-core; Multi-processing; Multi-threading; Nested Parallelism; NumPy; OpenMP; Oversubscription; Parallel Computations; Python; SciPy; TBB", } @Article{Maleki:2018:AHP, author = "Sepideh Maleki and Martin Burtscher", title = "Automatic Hierarchical Parallelization of Linear Recurrences", journal = j-SIGPLAN, volume = "53", number = "2", pages = "128--138", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173168", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Linear recurrences encompass many fundamental computations including prefix sums and digital filters. Later result values depend on earlier result values in recurrences, making it a challenge to compute them in parallel. We present a new work- and space-efficient algorithm to compute linear recurrences that is amenable to automatic parallelization and suitable for hierarchical massively-parallel architectures such as GPUs. We implemented our approach in a domain-specific code generator that emits optimized CUDA code. Our evaluation shows that, for standard prefix sums and single-stage IIR filters, the generated code reaches the throughput of memory copy for large inputs, which cannot be surpassed. On higher-order prefix sums, it performs nearly as well as the fastest handwritten code from the literature. On tuple-based prefix sums and digital filters, our automatically parallelized code outperforms the fastest prior implementations.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '18 proceedings.", } @Article{Malinowski:2018:SIP, author = "Artur Malinowski and Pawel Czarnul", title = "A Solution to Image Processing with Parallel {MPI} {I/O} and Distributed {NVRAM} Cache", journal = j-SCPE, volume = "19", number = "1", pages = "1--14", month = "????", year = "2018", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:50 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1389", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Moll:2018:PCF, author = "Simon Moll and Sebastian Hack", title = "Partial control-flow linearization", journal = j-SIGPLAN, volume = "53", number = "4", pages = "543--556", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192413", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "If-conversion is a fundamental technique for vectorization. It accounts for the fact that in a SIMD program, several targets of a branch might be executed because of divergence. Especially for irregular data-parallel workloads, it is crucial to avoid if-converting non-divergent branches to increase SIMD utilization. In this paper, we present partial linearization, a simple and efficient if-conversion algorithm that overcomes several limitations of existing if-conversion techniques. In contrast to prior work, it has provable guarantees on which non-divergent branches are retained and will never duplicate code or insert additional branches. We show how our algorithm can be used in a classic loop vectorizer as well as to implement data-parallel languages such as ISPC or OpenCL. Furthermore, we implement prior vectorizer optimizations on top of partial linearization in a more general way. We evaluate the implementation of our algorithm in LLVM on a range of irregular data analytics kernels, a neutronics simulation benchmark and NAB, a molecular dynamics benchmark from SPEC2017 on AVX2, AVX512, and ARM Advanced SIMD machines and report speedups of up to 146 \% over ICC, GCC and Clang O3.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '18 proceedings.", } @Article{Monteiro:2018:EGC, author = "Felipe R. Monteiro and Erickson H. da S. Alves and Isabela S. Silva and Hussama I. Ismail and Lucas C. Cordeiro and Eddie B. de Lima Filho", title = "{ESBMC-GPU}: a context-bounded model checking tool to verify {CUDA} programs", journal = j-SCI-COMPUT-PROGRAM, volume = "152", number = "??", pages = "63--69", day = "15", month = jan, year = "2018", CODEN = "SCPGD4", ISSN = "0167-6423 (print), 1872-7964 (electronic)", ISSN-L = "0167-6423", bibdate = "Sat Dec 2 17:23:38 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167642317301934", acknowledgement = ack-nhfb, fjournal = "Science of Computer Programming", journal-URL = "http://www.sciencedirect.com/science/journal/01676423", } @Article{Peng:2018:CDC, author = "Yuanfeng Peng and Vinod Grover and Joseph Devietti", title = "{CURD}: a dynamic {CUDA} race detector", journal = j-SIGPLAN, volume = "53", number = "4", pages = "390--403", month = apr, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296979.3192368", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:57 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "As GPUs have become an integral part of nearly every processor, GPU programming has become increasingly popular. GPU programming requires a combination of extreme levels of parallelism and low-level programming, making it easy for concurrency bugs such as data races to arise. These concurrency bugs can be extremely subtle and di cult to debug due to the massive numbers of threads running concurrently on a modern GPU. While some tools exist to detect data races in GPU programs, they are often prohibitively slow or focused only on a small class of data races in shared memory. Compared to prior work, our race detector, CURD, can detect data races precisely on both shared and global memory, selects an appropriate race detection algorithm based on the synchronization used in a program, and utilizes efficient compiler instrumentation to reduce performance overheads. Across 53 benchmarks, we find that using CURD incurs an aver- age slowdown of just 2.88x over native execution. CURD is 2.1x faster than Nvidia's CUDA-Racecheck race detector, despite detecting a much broader class of races. CURD finds 35 races across our benchmarks, including bugs in established benchmark suites and in sample programs from Nvidia.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PLDI '18 proceedings.", } @Article{Pessoa:2018:GAB, author = "Tiago Carneiro Pessoa and Jan Gmys and Francisco Heron de Carvalho J{\'u}nior and Nouredine Melab and Daniel Tuyttens", title = "{GPU}-accelerated backtracking using {CUDA Dynamic Parallelism}", journal = j-CCPE, volume = "30", number = "9", pages = "", day = "10", month = may, year = "2018", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4374", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Aug 4 10:03:13 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.4374", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Pierro:2018:SFP, author = "Vincenzo Pierro and Luigi Troiano and Elena Mejuto and Giovanni Filatrella", title = "Stochastic first passage time accelerated with {CUDA}", journal = j-J-COMPUT-PHYS, volume = "361", number = "??", pages = "136--149", day = "15", month = may, year = "2018", CODEN = "JCTPAH", DOI = "https://doi.org/10.1016/j.jcp.2018.01.039", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Wed Mar 21 16:14:42 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999118300494", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Pinho:2018:CTM, author = "Luis Miguel Pinho and Eduardo Qui{\~n}ones and Sara Royuela", title = "Combining the tasklet model with {OpenMP}", journal = j-SIGADA-LETTERS, volume = "38", number = "1", pages = "14--18", month = jun, year = "2018", CODEN = "AALEE5", DOI = "https://doi.org/10.1145/3241950.3241952", ISSN = "0736-721X", bibdate = "Sat Oct 19 17:57:55 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigada.bib", abstract = "Previous workshops have discussed a proposal to augment Ada with fine-grained parallelism, based on the notion of tasklets, a lightweight parallel entity. Recent works have shown the convergence of this model with the OpenMP tasking model and have proposed their coexistence. In this paper we provide a status of the existent works, and describe how these models could be combined.", acknowledgement = ack-nhfb, fjournal = "ACM SIGADA Ada Letters", journal-URL = "http://portal.acm.org/citation.cfm?id=J32", } @Article{Poirier:2018:DAB, author = "Carl Poirier and Benoit Gosselin and Paul Fortier", title = "{DNA} Assembly with {de Bruijn} Graphs Using an {FPGA} Platform", journal = j-TCBB, volume = "15", number = "3", pages = "1003--1009", month = may, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2696522", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Sat Jun 30 09:34:37 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "This paper presents an FPGA implementation of a DNA assembly algorithm, called Ray, initially developed to run on parallel CPUs. The OpenCL language is used and the focus is placed on modifying and optimizing the original algorithm to better suit the new parallelization tool and the radically different hardware architecture. The results show that the execution time is roughly one fourth that of the CPU and factoring energy consumption yields a tenfold savings.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Prabhu:2018:DRC, author = "Tarun Prabhu and William Gropp", title = "{DAME}: Runtime-compilation for data movement", journal = j-IJHPCA, volume = "32", number = "5", pages = "760--774", year = "2018", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342017695444", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Mon Nov 5 17:34:17 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://journals.sagepub.com/doi/full/10.1177/1094342017695444", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", xxmonth = sep, } @Article{Ramesh:2018:MPE, author = "Srinivasan Ramesh and Aur{\`e}le Mah{\'e}o and Sameer Shende and Allen D. Malony and Hari Subramoni and Amit Ruhela and Dhabaleswar K. (DK) Panda", title = "{MPI} performance engineering with the {MPI} tool interface: the integration of {MVAPICH} and {TAU}", journal = j-PARALLEL-COMPUTING, volume = "77", number = "??", pages = "19--37", month = sep, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.05.003", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jan 7 15:25:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301479", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Rasch:2018:MDH, author = "Ari Rasch and Sergei Gorlatch", title = "Multi-dimensional Homomorphisms and Their Implementation in {OpenCL}", journal = j-INT-J-PARALLEL-PROG, volume = "46", number = "1", pages = "101--119", month = feb, year = "2018", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-017-0508-z", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sun Feb 11 08:41:13 MST 2018", bibsource = "http://link.springer.com/journal/10766/46/1; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Riebler:2018:ACA, author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and Christian Plessl", title = "Automated code acceleration targeting heterogeneous {OpenCL} devices", journal = j-SIGPLAN, volume = "53", number = "1", pages = "417--418", month = jan, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3200691.3178534", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "Accelerators can offer exceptional performance advantages. However, programmers need to spend considerable efforts on acceleration, without knowing how sustainable the employed programming models, languages and tools are. To tackle this challenge, we propose and demonstrate a new runtime system called HT rOP that is able to automatically generate and execute OpenCL code from sequential CPU code. HTrOP transforms suitable data-parallel loops into independent OpenCL-typical work-items and handles concrete calls to these devices through a mix of library components and application-specific OpenCL host code. Computational hotspots are identified and can be offloaded to different resources (CPU, GPGPU and Xeon Phi). We demonstrate the potential of HTrOP on a broad set of applications and are able to improve the performance by 4.3X on average.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "PPoPP '18 proceedings.", } @Article{Rivas-Gomez:2018:MWS, author = "Sergio Rivas-Gomez and Roberto Gioiosa and Ivy Bo Peng and Gokcen Kestor and Sai Narasimhamurthy and Erwin Laure and Stefano Markidis", title = "{MPI} windows on storage for {HPC} applications", journal = j-PARALLEL-COMPUTING, volume = "77", number = "??", pages = "38--56", month = sep, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.05.007", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jan 7 15:25:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301571", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Rucci:2018:OOS, author = "Enzo Rucci and Carlos Garcia and Guillermo Botella and Armando E. {De Giusti} and Marcelo Naiouf and Manuel Prieto-Matias", title = "{OSWALD}: {OpenCL} {Smith--Waterman} on {Altera}'s {FPGA} for Large Protein Databases", journal = j-IJHPCA, volume = "32", number = "3", pages = "337--350", month = may, year = "2018", CODEN = "IHPCFL", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Mon Nov 5 17:34:16 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "http://hpc.sagepub.com/content/by/year", } @Article{Schmitt:2018:RHG, author = "Christian Schmitt and Moritz Schmid and Sebastian Kuckuk and Harald K{\"o}stler and J{\"u}rgen Teich and Frank Hannig", title = "Reconfigurable Hardware Generation of Multigrid Solvers with Conjugate Gradient Coarse-Grid Solution", journal = j-PARALLEL-PROCESS-LETT, volume = "28", number = "04", pages = "??--??", month = dec, year = "2018", DOI = "https://doi.org/10.1142/S0129626418500160", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Mon Mar 29 12:30:05 MDT 2021", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.worldscientific.com/doi/10.1142/S0129626418500160", abstract = "Not only in the field of high-performance computing (HPC), field programmable gate arrays (FPGAs) are a soaringly popular accelerator technology. However, they use a completely different programming paradigm and tool set compared to central processing units (CPUs) or even graphics processing units (GPUs), adding extra development steps and requiring special knowledge, hindering widespread use in scientific computing. To bridge this programmability gap, domain-specific languages (DSLs) are a popular choice to generate low-level implementations from an abstract algorithm description. In this work, we demonstrate our approach for the generation of numerical solver implementations based on the multigrid method for FPGAs from the same code base that is also used to generate code for CPUs using a hybrid parallelization of MPI and OpenMP. Our approach yields in a hardware design that can compute up to 11 V-cycles per second with an input grid size of 4096 {\texttimes} \{\texttimes} {\texttimes} 4096 and solution on the coarsest using the conjugate gradient (CG) method on a mid-range FPGA, beating vectorized, multi-threaded execution on an Intel Xeon processor.", acknowledgement = ack-nhfb, fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Si:2018:DAA, author = "Min Si and Antonio J. Pena and Jeff Hammond and Pavan Balaji and Masamichi Takagi and Yutaka Ishikawa", title = "Dynamic Adaptable Asynchronous Progress Model for {MPI} {RMA} Multiphase Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "29", number = "9", pages = "1975--1989", month = sep, year = "2018", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2815568", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Aug 9 10:52:00 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2018/09/08315136-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Snir:2018:FMT, author = "Marc Snir", title = "The future of {MPI}: technical perspective", journal = j-CACM, volume = "61", number = "10", pages = "105--105", month = oct, year = "2018", CODEN = "CACMA2", DOI = "https://doi.org/10.1145/3264415", ISSN = "0001-0782 (print), 1557-7317 (electronic)", ISSN-L = "0001-0782", bibdate = "Thu Sep 27 11:55:45 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/cacm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://cacm.acm.org/magazines/2018/10/231376/fulltext", acknowledgement = ack-nhfb, fjournal = "Communications of the ACM", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79", } @Article{Sojka:2018:IEM, author = "Radim Sojka and David Hor{\'a}k and V{\'a}clav Hapla and Martin Cerm{\'a}k", title = "The impact of enabling multiple subdomains per {MPI} process in the {TFETI} domain decomposition method", journal = j-APPL-MATH-COMP, volume = "319", number = "??", pages = "586--597", day = "15", month = feb, year = "2018", CODEN = "AMHCBQ", DOI = "https://doi.org/10.1016/j.amc.2017.07.031", ISSN = "0096-3003 (print), 1873-5649 (electronic)", ISSN-L = "0096-3003", bibdate = "Wed Nov 15 17:37:14 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/applmathcomput2015.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0096300317304927", acknowledgement = ack-nhfb, fjournal = "Applied Mathematics and Computation", journal-URL = "http://www.sciencedirect.com/science/journal/00963003", } @Article{Sotiriou-Xanthopoulos:2018:OBV, author = "Efstathios Sotiriou-Xanthopoulos and Leonard Masing and Sotirios Xydis and Kostas Siozios and J{\"U}rgen Becker and Dimitrios Soudris", title = "{OpenCL}-based Virtual Prototyping and Simulation of Many-Accelerator Architectures", journal = j-TECS, volume = "17", number = "5", pages = "86:1--86:??", month = nov, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3242179", ISSN = "1539-9087 (print), 1558-3465 (electronic)", ISSN-L = "1539-9087", bibdate = "Thu Oct 17 18:16:41 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tecs.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3242179", abstract = "Heterogeneous architectures featuring multiple hardware accelerators have been proposed as a promising solution for meeting the ever-increasing performance and power requirements of embedded systems. However, the existence of numerous design parameters may result in different architectural schemes and thus in extra design effort. To address this issue, OpenCL-based frameworks have been recently utilized for FPGA programming, to enable the portability of a source code to multiple architectures. However, such OpenCL frameworks focus on RTL design, thus not enabling rapid prototyping and abstracted modeling of complex systems. Virtual Prototyping aims to overcome this problem by enabling the system modeling in higher abstraction levels. This article combines the benefits of OpenCL and Virtual Prototyping, by proposing an OpenCL-based prototyping framework for data-parallel many-accelerator systems, which (a) creates a SystemC Virtual Platform from OpenCL, (b) provides a co-simulation environment for the host and the Virtual Platform, (c) offers memory and interconnection models for parallel data processing, and (d) enables the system evaluation with alternative real number representations (e.g., fixed-point or 16-bit floating-point).", acknowledgement = ack-nhfb, articleno = "86", fjournal = "ACM Transactions on Embedded Computing Systems", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840", } @Article{Stpiczynski:2018:LBV, author = "Przemys{\l}aw Stpiczy{\'n}ski", title = "Language-based vectorization and parallelization using intrinsics, {OpenMP}, {TBB} and {Cilk Plus}", journal = j-J-SUPERCOMPUTING, volume = "74", number = "4", pages = "1461--1472", month = apr, year = "2018", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-017-2231-3", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:11 MDT 2019", bibsource = "http://link.springer.com/journal/11227/74/4; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/s11227-017-2231-3.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Tagliavini:2018:UFG, author = "Giuseppe Tagliavini and Daniele Cesarini and Andrea Marongiu", title = "Unleashing Fine-Grained Parallelism on Embedded Many-Core Accelerators with Lightweight {OpenMP} Tasking", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "29", number = "9", pages = "2150--2163", month = sep, year = "2018", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2814602", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Aug 9 10:52:00 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2018/09/08314096-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Tellez-Velazquez:2018:CSI, author = "Arturo T{\'e}llez-Vel{\'a}zquez and Ra{\'u}l Cruz-Barbosa", title = "A {CUDA}-streams inference machine for non-singleton fuzzy systems", journal = j-CCPE, volume = "30", number = "8", pages = "", day = "25", month = apr, year = "2018", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4382", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Aug 4 10:03:13 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.4382", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", } @Article{Tong:2018:FCM, author = "Zhou Tong and Scott Pakin and Michael Lang and Xin Yuan", title = "Fast classification of {MPI} applications using {Lamport}'s logical clocks", journal = j-J-PAR-DIST-COMP, volume = "120", number = "??", pages = "77--88", month = oct, year = "2018", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2018.05.005", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Aug 10 09:10:45 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S074373151830340X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Tracy:2018:CMC, author = "Fred Thomas Tracy and Thomas C. Oppe and Maureen K. Corcoran", title = "A comparison of {MPI} and co-array {FORTRAN} for large finite element variably saturated flow simulations", journal = j-SCPE, volume = "19", number = "4", pages = "423--432", month = "????", year = "2018", CODEN = "????", ISSN = "1895-1767", ISSN-L = "1895-1767", bibdate = "Mon Jan 7 06:46:51 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/scpe.bib", URL = "https://www.scpe.org/index.php/scpe/article/view/1468", acknowledgement = ack-nhfb, fjournal = "Scalable Computing: Practice and Experience", journal-URL = "http://www.scpe.org/", } @Article{Valero-Lara:2018:CCC, author = "Pedro Valero-Lara and Ivan Mart{\'\i}nez-P{\'e}rez and Ra{\"u}l Sirvent and Xavier Martorell and Antonio J. Pe{\~n}a", title = "{cuThomasBatch} and {cuThomasVBatch}, {CUDA} routines to compute batch of tridiagonal systems on {NVIDIA GPUs}", journal = j-CCPE, volume = "30", number = "24", pages = "e4909:1--e4909:??", day = "25", month = dec, year = "2018", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4909", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Mar 28 08:07:53 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "27 August 2018", } @Article{Villaverde:2018:PTI, author = "Alejandro F. Villaverde and Kolja Becker and Julio R. Banga", title = "{PREMER}: a Tool to Infer Biological Networks", journal = j-TCBB, volume = "15", number = "4", pages = "1193--1202", month = jul, year = "2018", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2017.2758786", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Thu Nov 8 06:18:45 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "Inferring the structure of unknown cellular networks is a main challenge in computational biology. Data-driven approaches based on information theory can determine the existence of interactions among network nodes automatically. However, the elucidation of certain features-such as distinguishing between direct and indirect interactions or determining the direction of a causal link-requires estimating information-theoretic quantities in a multidimensional space. This can be a computationally demanding task, which acts as a bottleneck for the application of elaborate algorithms to large-scale network inference problems. The computational cost of such calculations can be alleviated by the use of compiled programs and parallelization. To this end, we have developed PREMER Parallel Reverse Engineering with Mutual information \& Entropy Reduction, a software toolbox that can run in parallel and sequential environments. It uses information theoretic criteria to recover network topology and determine the strength and causality of interactions, and allows incorporating prior knowledge, imputing missing data, and correcting outliers. PREMER is a free, open source software tool that does not require any commercial software. Its core algorithms are programmed in FORTRAN 90 and implement OpenMP directives. It has user interfaces in Python and MATLAB/Octave, and runs on Windows, Linux, and OSX https://sites.google.com/site/premertoolbox/.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Wolfe:2018:MLS, author = "Noah Wolfe and Misbah Mubarak and Christopher D. Carothers and Robert B. Ross and Philip H. Carns", title = "Modeling Large-Scale Slim Fly Networks Using Parallel Discrete-Event Simulation", journal = j-TOMACS, volume = "28", number = "4", pages = "29:1--29:??", month = oct, year = "2018", CODEN = "ATMCEZ", DOI = "https://doi.org/10.1145/3203406", ISSN = "1049-3301 (print), 1558-1195 (electronic)", ISSN-L = "1049-3301", bibdate = "Mon Feb 4 19:19:05 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tomacs.bib", abstract = "As supercomputers approach exascale performance, the increased number of processors translates to an increased demand on the underlying network interconnect. The slim fly network topology, a new low-diameter, low-latency, and low-cost interconnection network, is gaining interest as one possible solution for next-generation supercomputing interconnect systems. In this article, we present a high-fidelity slim fly packet-level model leveraging the Rensselaer Optimistic Simulation System (ROSS) and Co-Design of Exascale Storage (CODES) frameworks. We validate the model with published work before scaling the network size up to an unprecedented 1 million compute nodes and confirming that the slim fly observes peak network throughput at extreme scale. In addition to synthetic workloads, we evaluate large-scale slim fly models with real communication workloads from applications in the Design Forward program with over 110,000 MPI processes. We show strong scaling of the slim fly model on an Intel cluster achieving a peak network packet transfer rate of 2.3 million packets per second and processing over 7 billion discrete events using 128 MPI tasks. Enabled by the strong performance capabilities of the model, we perform a detailed application trace and routing protocol performance study. Through analysis of metrics such as packet latency, hop count, and congestion, we find that the slim fly network is able to leverage simple minimal routing and achieve the same performance as more complex adaptive routing for tested DOE benchmark applications.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Modeling and Computer Simulation", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J781", } @Article{Wolfe:2018:ODM, author = "Michael Wolfe and Seyong Lee and Jungwon Kim and Xiaonan Tian and Rengan Xu and Barbara Chapman and Sunita Chandrasekaran", title = "The {OpenACC} data model: Preliminary study on its major challenges and implementations", journal = j-PARALLEL-COMPUTING, volume = "78", number = "??", pages = "15--27", month = oct, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.07.003", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jan 7 15:25:20 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118302175", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Yamazaki:2018:SIL, author = "Ichitaro Yamazaki and Jakub Kurzak and Panruo Wu and Mawussi Zounon and Jack Dongarra", title = "Symmetric Indefinite Linear Solver Using {OpenMP} Task on Multicore Architectures", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "29", number = "8", pages = "1879--1892", month = aug, year = "2018", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2808964", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jul 25 09:07:14 MDT 2018", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2018/08/08301559-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Yviquel:2018:CPU, author = "Herv{\'e} Yviquel and Lauro Cruz and Guido Araujo", title = "Cluster Programming using the {OpenMP} Accelerator Model", journal = j-TACO, volume = "15", number = "3", pages = "35:1--35:??", month = oct, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3226112", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Tue Jan 8 17:19:59 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3226112", abstract = "Computation offloading is a programming model in which program fragments (e.g., hot loops) are annotated so that their execution is performed in dedicated hardware or accelerator devices. Although offloading has been extensively used to move computation to GPUs, through directive-based annotation standards like OpenMP, offloading computation to very large computer clusters can become a complex and cumbersome task. It typically requires mixing programming models (e.g., OpenMP and MPI) and languages (e.g., C/C++ and Scala), dealing with various access control mechanisms from different cloud providers (e.g., AWS and Azure), and integrating all this into a single application. This article introduces computer cluster nodes as simple OpenMP offloading devices that can be used either from a local computer or from the cluster head-node. It proposes a methodology that transforms OpenMP directives to Spark runtime calls with fully integrated communication management, in a way that a cluster appears to the programmer as yet another accelerator device. Experiments using LLVM 3.8, OpenMP 4.5 on well known cloud infrastructures (Microsoft Azure and Amazon EC2) show the viability of the proposed approach, enable a thorough analysis of its performance, and make a comparison with an MPI implementation. The results show that although data transfers can impose overheads, cloud offloading from a local machine can still achieve promising speedups for larger granularity: up to 115$ \times $ in 256 cores for the 2MM benchmark using 1GB sparse matrices. In addition, the parallel implementation of a complex and relevant scientific application reveals a 80$ \times $ speedup on a 320 core machine when executed directly from the headnode of the cluster.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Zha:2018:LSM, author = "Yue Zha and Jing Li", title = "{Liquid Silicon-Monona}: a Reconfigurable Memory-Oriented Computing Fabric with Scalable Multi-Context Support", journal = j-SIGPLAN, volume = "53", number = "2", pages = "214--228", month = feb, year = "2018", CODEN = "SINODQ", DOI = "https://doi.org/10.1145/3296957.3173167", ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160 (electronic)", ISSN-L = "0362-1340", bibdate = "Wed Oct 16 14:12:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigplan2010.bib", abstract = "With the recent trend of promoting Field-Programmable Gate Arrays (FPGAs) to first-class citizens in accelerating compute-intensive applications in networking, cloud services and artificial intelligence, FPGAs face two major challenges in sustaining competitive advantages in performance and energy efficiency for diverse cloud workloads: (1) limited configuration capability for supporting light-weight computations/on-chip data storage to accelerate emerging search-/data-intensive applications. (2) lack of architectural support to hide reconfiguration overhead for assisting virtualization in a cloud computing environment. In this paper, we propose a reconfigurable memory-oriented computing fabric, namely Liquid Silicon-Monona (L-Si), enabled by emerging nonvolatile memory technology i.e. RRAM, to address these two challenges. Specifically, L-Si addresses the first challenge by virtue of a new architecture comprising a 2D array of physically identical but functionally-configurable building blocks. It, for the first time, extends the configuration capabilities of existing FPGAs from computation to the whole spectrum ranging from computation to data storage. It allows users to better customize hardware by flexibly partitioning hardware resources between computation and memory, greatly benefiting emerging search- and data-intensive applications. To address the second challenge, L-Si provides scalable multi-context architectural support to minimize reconfiguration overhead for assisting virtualization. In addition, we provide compiler support to facilitate the programming of applications written in high-level programming languages (e.g. OpenCL) and frameworks (e.g. TensorFlow, MapReduce) while fully exploiting the unique architectural capability of L-Si. Our evaluation results show L-Si achieves 99.6\% area reduction, 1.43$ \times $ throughput improvement and 94.0\% power reduction on search-intensive benchmarks, as compared with the FPGA baseline. For neural network benchmarks, on average, L-Si achieves 52.3$ \times $ speedup, 113.9$ \times $ energy reduction and 81\% area reduction over the FPGA baseline. In addition, the multi-context architecture of L-Si reduces the context switching time to --- 10ns, compared with an off-the-shelf FPGA (~100ms), greatly facilitating virtualization.", acknowledgement = ack-nhfb, fjournal = "ACM SIGPLAN Notices", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706", remark = "ASPLOS '18 proceedings.", } @Article{Zhang:2018:IRP, author = "Xuechen Zhang and Song Jiang and Alseny Diallo and Lei Wang", title = "{IR+}: Removing parallel {I/O} interference of {MPI} programs via data replication over heterogeneous storage devices", journal = j-PARALLEL-COMPUTING, volume = "76", number = "??", pages = "91--105", month = aug, year = "2018", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.01.004", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jun 4 07:40:18 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118300140", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Adam:2019:CRA, author = "Julien Adam and Maxime Kermarquer and Jean-Baptiste Besnard and Leonardo Bautista-Gomez and Marc P{\'e}rache and Patrick Carribault and Julien Jaeger and Allen D. Malony and Sameer Shende", title = "Checkpoint\slash restart approaches for a thread-based {MPI} runtime", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "204--219", month = jul, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2019.02.006", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303247", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Al-Shorman:2019:UPP, author = "Mohammad Y. Al-Shorman and Majd M. Al-Kofahi", title = "Ultrasonic pulse propagation simulation using {OpenCL} for environment mapping and discovery", journal = j-IJHPCA, volume = "33", number = "5", pages = "1019--1029", day = "1", month = sep, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342019846290", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:54 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019846290", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Awan:2019:OLM, author = "Ammar Ahmad Awan and Karthik Vadambacheri Manian and Ching-Hsiang Chu and Hari Subramoni and Dhabaleswar K. Panda", title = "Optimized large-message broadcast for deep learning workloads: {MPI}, {MPI + NCCL}, or {NCCL2}?", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "141--152", month = jul, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303284", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Badia:2019:ASP, author = "Jose M. Bad{\'{\i}}a and Jose A. Belloch and Maximo Cobos and Francisco D. Igual and Enrique S. Quintana-Ort{\'{\i}}", title = "Accelerating the {SRP--PHAT} algorithm on multi- and many-core platforms using {OpenCL}", journal = j-J-SUPERCOMPUTING, volume = "75", number = "3", pages = "1284--1297", month = mar, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-018-2422-6", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:17 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Balaji:2019:SIM, author = "Pavan Balaji and Marc Casas", title = "Special issue on the Message Passing Interface", journal = j-PARALLEL-COMPUTING, volume = "86", number = "??", pages = "14--15", month = aug, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S016781911930095X", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Boschetti:2019:MOD, author = "Marco Antonio Boschetti and Vittorio Maniezzo and Francesco Strappaveccia", title = "Membership overlay design optimization with resource constraints (accelerated on {GPU})", journal = j-J-PAR-DIST-COMP, volume = "133", number = "??", pages = "286--296", month = nov, year = "2019", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Sep 13 10:25:21 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731518304908", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Brown:2019:LMR, author = "Nick Brown and Michael Bareford and Mich{\`e}le Weiland", title = "Leveraging {MPI} {RMA} to optimize halo-swapping communications in {MONC} on {Cray} machines", journal = j-CCPE, volume = "31", number = "16", pages = "e5008:1--e5008:??", day = "25", month = aug, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5008", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Oct 12 11:00:04 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "25 September 2018", } @Article{Budiardja:2019:TGO, author = "Reuben D. Budiardja and Christian Y. Cardall", title = "Targeting {GPUs} with {OpenMP} directives on {Summit}: a simple and effective {Fortran} experience", journal = j-PARALLEL-COMPUTING, volume = "88", number = "??", pages = "Article 102544", month = "????", year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:02 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819119301358", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Cadenelli:2019:CUO, author = "Nicola Cadenelli and Zoran Jak{\v{s}}i{\'c} and Jord{\`a} Polo and David Carrera", title = "Considerations in using {OpenCL} on {GPUs} and {FPGAs} for throughput-oriented genomics workloads", journal = j-FUT-GEN-COMP-SYS, volume = "94", number = "??", pages = "148--159", month = may, year = "2019", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Oct 14 16:09:56 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18314183", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Candel:2019:EMC, author = "F. Candel and A. Valero and S. Petit and J. Sahuquillo", title = "Efficient Management of Cache Accesses to Boost {GPGPU} Memory Subsystem Performance", journal = j-IEEE-TRANS-COMPUT, volume = "68", number = "10", pages = "1442--1454", month = oct, year = "2019", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2019.2907591", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Sep 12 13:33:25 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", keywords = "Analytical models; Energy consumption; GPU; Graphics processing units; Instruction sets; memory hierarchy; Memory management; miss management; Proposals", } @Article{Chen:2019:STG, author = "Yong Chen and Weijia Shang", title = "Supernode transformation on {GPGPUs}", journal = j-INT-J-PAR-EMER-DIST-SYS, volume = "34", number = "2", pages = "181--202", year = "2019", CODEN = "????", DOI = "https://doi.org/10.1080/17445760.2017.1296147", ISSN = "1744-5760 (print), 1744-5779 (electronic)", ISSN-L = "1744-5760", bibdate = "Tue Sep 10 15:30:02 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; http://www.tandfonline.com/toc/gpaa20/34/2", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel, Emergent and Distributed Systems: IJPEDS", journal-URL = "http://www.tandfonline.com/loi/gpaa20", onlinedate = "06 Apr 2018", } @Article{Chikin:2019:MAA, author = "Artem Chikin and Taylor Lloyd and Jos{\'e} Nelson Amaral and Ettore Tiotto and Muhammad Usman", title = "Memory-access-aware Safety and Profitability Analysis for Transformation of Accelerator-bound {OpenMP} Loops", journal = j-TACO, volume = "16", number = "3", pages = "30:1--30:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3333060", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Jul 26 14:25:54 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Iteration Point Difference Analysis is a new static analysis framework that can be used to determine the memory coalescing characteristics of parallel loops that target GPU offloading and to ascertain safety and profitability of loop transformations with the goal of improving their memory access characteristics. This analysis can propagate definitions through control flow, works for non-affine expressions, and is capable of analyzing expressions that reference conditionally defined values. This analysis framework enables safe and profitable loop transformations. Experimental results demonstrate potential for dramatic performance improvements. GPU kernel execution time across the Polybench suite is improved by up to $ 25.5 \times $ on an Nvidia P100 with benchmark overall improvement of up to $ 3.2 \times $. An opportunity detected in a SPEC ACCEL benchmark yields kernel speedup of $ 86.5 \times $ with a benchmark improvement of $ 3.3 \times $. This work also demonstrates how architecture-aware compilers improve code portability and reduce programmer effort.", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Ciglaric:2019:OLP, author = "Tadej Ciglaric and Rok Cesnovar and Erik Strumbelj", title = "An {OpenCL} library for parallel random number generators", journal = j-J-SUPERCOMPUTING, volume = "75", number = "7", pages = "3866--3881", month = jul, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02756-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:20 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/7; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Clauser:2019:FFO, author = "C. F. Clauser and R. Farengo and H. E. Ferrari", title = "{FOCUS}: a full-orbit {CUDA} solver for particle simulations in magnetized plasmas", journal = j-COMP-PHYS-COMM, volume = "234", number = "??", pages = "126--136", month = jan, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.07.018", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Oct 16 18:11:50 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518302753", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Coronado-Barrientos:2019:ANF, author = "E. Coronado-Barrientos and G. Indalecio and A. Garc{\'\i}a-Loureiro", title = "{AXC}: a new format to perform the {SpMV} oriented to {Intel Xeon Phi} architecture in {OpenCL}", journal = j-CCPE, volume = "31", number = "1", pages = "e4864:1--e4864:??", day = "10", month = jan, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4864", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Mar 28 08:07:54 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "31 July 2018", } @Article{Crivellini:2019:OPS, author = "Andrea Crivellini and Matteo Franciolini", title = "{OpenMP} Parallelization Strategies for a Discontinuous {Galerkin} Solver", journal = j-INT-J-PARALLEL-PROG, volume = "47", number = "5--6", pages = "838--873", month = dec, year = "2019", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-018-0589-3", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Sat Jul 25 06:58:52 MDT 2020", bibsource = "http://link.springer.com/journal/10766/47/5; https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s10766-018-0589-3", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", } @Article{Daberdaku:2019:ACT, author = "Sebastian Daberdaku", title = "Accelerating the computation of triangulated molecular surfaces with {OpenMP}", journal = j-J-SUPERCOMPUTING, volume = "75", number = "7", pages = "3426--3470", month = jul, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02803-y", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:20 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/7; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Dalcin:2019:FPM, author = "Lisandro Dalcin and Mikael Mortensen and David E. Keyes", title = "Fast parallel multidimensional {FFT} using advanced {MPI}", journal = j-J-PAR-DIST-COMP, volume = "128", number = "??", pages = "137--150", month = jun, year = "2019", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2019.02.006", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon May 20 18:06:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S074373151830306X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Deng:2019:CBV, author = "Y. Deng and T. Li and Y. Luo and X. Zhao", title = "{CUDA}-Based Volume Rendering and Inspection for Time-Varying Ultrasonic Testing Datasets", journal = j-COMPUT-SCI-ENG, volume = "21", number = "5", pages = "76--86", month = sep # "\slash " # oct, year = "2019", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2018.2875319", ISSN = "1521-9615 (print), 1558-366x (electronic)", ISSN-L = "1521-9615", bibdate = "Mon Aug 19 06:40:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See corrections \cite{Deng:2020:CCB}.", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", keywords = "Acoustics; Data visualization; Image color analysis; Real-time systems; Rendering (computer graphics); Three-dimensional displays; Transfer functions", } @Article{Denis:2019:SPT, author = "Alexandre Denis and Julien Jaeger and Emmanuel Jeannot and Marc P{\'e}rache and Hugo Taboada", title = "Study on progress threads placement and dedicated cores for overlapping {MPI} nonblocking collectives on manycore processor", journal = j-IJHPCA, volume = "33", number = "6", pages = "1240--1254", day = "1", month = nov, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342019860184", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:54 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019860184", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Deveci:2019:GMT, author = "M. Deveci and K. D. Devine and K. Pedretti and M. A. Taylor and S. Rajamanickam and {\"U}. V. {\c{C}}ataly{\"u}rek", title = "Geometric Mapping of Tasks to Processors on Parallel Computers with Mesh or Torus Networks", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "9", pages = "2018--2032", month = sep, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2900043", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "algorithmic optimizations; application program interfaces; Bandwidth; communication interdependence; contiguous allocation; contiguous block; Cray XK7; E3SM/HOMME; finite difference methods; geometric mapping; geometric partitioning; geometric partitioning algorithm; geometric proximity; IBM BlueGene/Q; jagged partitioning; load balancing; Measurement; mesh networks; message passing; MiniGhost default mapping; MPI tasks; multiprocessing systems; Network topology; noncontiguous allocations; optimisation; parallel applications; parallel computers; parallel machines; Partitioning algorithms; processors; Program processors; recursive bisection; resource allocation; Resource management; sparse allocation; sparse node allocation; spatial partitioning; structured finite difference mini-application; Task analysis; Task mapping; torus networks", } @Article{Diaz:2019:AOO, author = "Jose Monsalve Diaz and Kyle Friedline and Swaroop Pophale and Oscar Hernandez and David E. Bernholdt and Sunita Chandrasekaran", title = "Analysis of {OpenMP 4.5} Offloading in Implementations: Correctness and Overhead", journal = j-PARALLEL-COMPUTING, volume = "89", number = "??", pages = "Article 102546", month = nov, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2019.102546", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:35:58 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819119301371", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dieguez:2019:TPR, author = "Adri{\'a}n P. Di{\'e}guez and Margarita Amor and Ram{\'o}n Doallo", title = "Tree Partitioning Reduction: A New Parallel Partition Method for Solving Tridiagonal Systems", journal = j-TOMS, volume = "45", number = "3", pages = "31:1--31:26", month = aug, year = "2019", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3328731", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Sep 3 17:49:22 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3328731", abstract = "Solving tridiagonal linear-equation systems is a fundamental computing kernel in a wide range of scientific and engineering applications, and its computation can be modeled with parallel algorithms. These parallel solvers are typically designed to compute problems whose data fit in a common shared-memory space where all the cores taking part in the computation have access. However, when the problem size is large, data cannot be entirely stored in the common shared-memory space, and a high number of high-latency communications are performed. One alternative is to partition the problem among different memory spaces. At this point, conventional parallel algorithms do not facilitate the partition of computation in independent tiles, since each reduction depends on equations that may be in different tiles. This article proposes an algorithm based on a tree reduction, called the Tree Partitioning Reduction (TPR) method, which partitions the problem into independent slices that can be partially computed in parallel within different common shared-memory spaces. The TPR method can be implemented for any parallel and distributed programming paradigm. Furthermore, in this work, TPR is efficiently implemented for CUDA GPUs to solve large size problems, providing highly competitive performance results with respect to existing packages, being, on average, 22.03$ \times $ faster than CUSPARSE.", acknowledgement = ack-nhfb, articleno = "31", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Diep:2019:TSS, author = "Thanh-Dang Diep and Kien Trung Pham and Karl F{\"u}rlinger and Nam Thoai", title = "A time-stamping system to detect memory consistency errors in {MPI} one-sided applications", journal = j-PARALLEL-COMPUTING, volume = "86", number = "??", pages = "36--44", month = aug, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303235", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Dongarra:2019:PPL, author = "Jack Dongarra and Mark Gates and Azzam Haidar and Jakub Kurzak and Piotr Luszczek and Panruo Wu and Ichitaro Yamazaki and Asim Yarkhan and Maksims Abalenkovs and Negin Bagherpour and Sven Hammarling and Jakub S{\'\i}stek and David Stevens and Mawussi Zounon and Samuel D. Relton", title = "{PLASMA}: Parallel Linear Algebra Software for Multicore Using {OpenMP}", journal = j-TOMS, volume = "45", number = "2", pages = "16:1--16:35", month = apr, year = "2019", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3264491", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon May 6 18:23:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3264491", abstract = "The recent version of the Parallel Linear Algebra Software for Multicore Architectures (PLASMA) library is based on tasks with dependencies from the OpenMP standard. The main functionality of the library is presented. Extensive benchmarks are targeted on three recent multicore and manycore architectures, namely, an Intel Xeon, Intel Xeon Phi, and IBM POWER 8 processors.", acknowledgement = ack-nhfb, articleno = "16", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Doulis:2019:CMP, author = "Georgios Doulis and J{\"o}rg Frauendiener and Chris Stevens and Ben Whale", title = "{COFFEE} --- an {MPI}-parallelized {Python} package for the numerical evolution of differential equations", journal = j-SOFTWAREX, volume = "10", number = "??", pages = "Article 100283", month = jul # "\slash " # dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1016/j.softx.2019.100283", ISSN = "2352-7110", ISSN-L = "2352-7110", bibdate = "Fri Apr 9 16:04:36 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/softwarex.bib", URL = "http://www.sciencedirect.com/science/article/pii/S2352711019300950", acknowledgement = ack-nhfb, fjournal = "SoftwareX", journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues", } @Article{Edmonds:2019:HAS, author = "Mark Edmonds and Tanvir Atahary and Scott Douglass and Tarek Taha", title = "Hardware Accelerated Semantic Declarative Memory Systems through {CUDA} and {MapReduce}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "3", pages = "601--614", month = mar, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2866848", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 14 06:19:14 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.computer.org/csdl/trans/td/2019/03/08444694-abs.html", acknowledgement = ack-nhfb, journal-URL = "http://www.computer.org/tpds/archives.htm", } @Article{Faict:2019:MGI, author = "Thomas Faict and Erik H. D'Hollander and Bart Goossens", title = "Mapping a Guided Image Filter on the {HARP} Reconfigurable Architecture Using {OpenCL}", journal = j-ALGORITHMS-BASEL, volume = "12", number = "8", month = aug, year = "2019", CODEN = "ALGOCH", DOI = "https://doi.org/10.3390/a12080149", ISSN = "1999-4893 (electronic)", ISSN-L = "1999-4893", bibdate = "Thu May 28 08:40:45 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/algorithms.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.mdpi.com/1999-4893/12/8/149", acknowledgement = ack-nhfb, articleno = "149", fjournal = "149", journal-URL = "https://www.mdpi.com/", pagecount = "??", } @Article{Fan:2019:BPA, author = "Xing Fan and Oliver Sinnen and Nasser Giacaman", title = "Balancing parallelization and asynchronization in event-driven programs with {OpenMP}", journal = j-CCPE, volume = "31", number = "4", pages = "e4959:1--e4959:??", day = "25", month = feb, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4959", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Thu Mar 28 08:07:55 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "21 September 2018", } @Article{Fan:2019:SAO, author = "Xing Fan and Oliver Sinnen and Nasser Giacaman", title = "Supporting asynchronization in {OpenMP} for event-driven programming", journal = j-PARALLEL-COMPUTING, volume = "82", number = "??", pages = "57--74", month = "????", year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.03.008", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Tue Mar 12 06:47:09 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118300838", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Figueiredo:2019:MOP, author = "Marco Antonio C. de {Figueiredo, Jr.} and Edans F. de Oliveira Sandes and Genaina N. Rodrigues and George L. M. Teodoro and Alba Cristina M. A. de Melo", title = "{MASA-OpenCL}: Parallel pruned comparison of long {DNA} sequences with {OpenCL}", journal = j-CCPE, volume = "31", number = "11", pages = "e5039:1--e5039:??", day = "10", month = jun, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5039", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Oct 12 11:00:02 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "21 October 2018", } @Article{Fujita:2019:EIM, author = "Hajime Fujita and Chongxiao Cao and Sayantan Sur and Charles Archer and Erik Paulson and Maria Garzaran", title = "Efficient implementation of {MPI-3} {RMA} over {openFabrics} interfaces", journal = j-PARALLEL-COMPUTING, volume = "87", number = "??", pages = "1--10", month = sep, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2019.04.008", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:02 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303843", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gittens:2019:AAS, author = "Alex Gittens and Kai Rothauge and Shusen Wang and Michael W. Mahoney and Jey Kottalam and Lisa Gerhardt and Prabhat and Michael Ringenburg and Kristyn Maschhoff", title = "{Alchemist}: an {Apache Spark} $ \leftrightarrow $ {MPI} interface", journal = j-CCPE, volume = "31", number = "16", pages = "e5026:1--e5026:??", day = "25", month = aug, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5026", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Oct 12 11:00:04 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "12 November 2018", } @Article{Gloster:2019:CBP, author = "Andrew Gloster and Lennon {\'O} N{\'a}raigh and Khang Ee Pang", title = "{cuPentBatch} --- a batched pentadiagonal solver for {NVIDIA} {GPUs}", journal = j-COMP-PHYS-COMM, volume = "241", number = "??", pages = "113--121", month = aug, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2019.03.016", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue May 14 10:01:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300979", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Gloster:2019:CCF, author = "Andrew Gloster and Lennon {{\'O} N{\'a}raigh}", title = "{cuSten} --- {CUDA} finite difference and stencil library", journal = j-SOFTWAREX, volume = "10", number = "??", pages = "Article 100337", month = jul # "\slash " # dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1016/j.softx.2019.100337", ISSN = "2352-7110", ISSN-L = "2352-7110", bibdate = "Fri Apr 9 16:04:36 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/softwarex.bib", URL = "http://www.sciencedirect.com/science/article/pii/S2352711019300561", acknowledgement = ack-nhfb, fjournal = "SoftwareX", journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues", } @Article{Gropp:2019:GEI, author = "William Gropp and Rajeev Thakur", title = "{Guest Editor}'s introduction: Special issue on best papers from {EuroMPI\slash USA 2017}", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "62--62", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819119300560", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gropp:2019:UNS, author = "William D. Gropp", title = "Using node and socket information to implement {MPI} {Cartesian} topologies", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "98--108", month = jul, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303156", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gueunet:2019:TBA, author = "C. Gueunet and P. Fortin and J. Jomier and J. Tierny", title = "Task-Based Augmented Contour Trees with {Fibonacci} Heaps", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "8", pages = "1889--1905", month = aug, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2898436", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fibquart.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "computation procedure; contour tree based applications; Data analysis; data segmentation applications; data structures; Data structures; data visualisation; Data visualization; fast shared memory; Fibonacci heaps; independent local tasks; intermediate data structures; join split trees; multi-core architecture; multi-threading; multicore computation; OpenMP task runtime; parallel algorithm; parallel algorithms; Parallel algorithms; parallel thanks; Runtime; Scientific visualization; Task analysis; task parallelism; task-based augmented contour trees; topological data analysis; tree algorithm; trees (mathematics)", } @Article{Hajihassani:2019:FAI, author = "O. Hajihassani and S. K. Monfared and S. H. Khasteh and S. Gorgin", title = "Fast {AES} Implementation: A High-Throughput Bitsliced Approach", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "10", pages = "2211--2222", month = oct, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2911278", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Dec 19 09:20:35 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "AES; byte-wise operations; computing process; cryptography; CTR; CUDA; CUDA-enabled GPU; Data models; data representation; data representation scheme; ECB; Encryption; encryption throughput; fast AES implementation; GPU; Graphics processing units; high-performance; high-throughput bitsliced AES implementation; high-throughput bitsliced approach; logic circuits; parallel architectures; parallelization capability; parallelization unit; S-box logic circuit; ShiftRows; Standards; substitute bytes stage; Table lookup; Throughput", } @Article{Hermanns:2019:MEI, author = "Marc-Andr{\'e} Hermanns and Nathan T. Hjelm and Michael Knobloch and Kathryn Mohror and Martin Schulz", title = "The {MPI\_T} events interface: an early evaluation and overview of the interface", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "119--130", month = jul, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303314", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Holmes:2019:PPE, author = "Daniel J. Holmes and Bradley Morgan and Anthony Skjellum and Purushotham V. Bangalore and Srinivas Sridharan", title = "Planning for performance: Enhancing achievable performance for {MPI} through persistent collective operations", journal = j-PARALLEL-COMPUTING, volume = "81", number = "??", pages = "32--57", month = jan, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.08.001", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Jan 7 15:25:21 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118302412", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Huckelheim:2019:RMA, author = "Jan H{\"u}ckelheim and Paul Hovland and Michelle Mills Strout and Jens-Dominik M{\"u}ller", title = "Reverse-mode algorithmic differentiation of an {OpenMP}-parallel compressible flow solver", journal = j-IJHPCA, volume = "33", number = "1", pages = "140--154", day = "1", month = jan, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342017712060", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:53 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017712060", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Izadpanah:2019:PAP, author = "Ramin Izadpanah and Benjamin A. Allan and Damian Dechev and Jim Brandt", title = "Production Application Performance Data Streaming for System Monitoring", journal = j-TOMPECS, volume = "4", number = "2", pages = "8:1--8:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319498", ISSN = "2376-3639", bibdate = "Sat Sep 21 07:21:17 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tompecs.bib", URL = "https://dl.acm.org/citation.cfm?id=3319498", abstract = "In this article, we present an approach to streaming collection of application performance data. Practical application performance tuning and troubleshooting in production high-performance computing (HPC) environments requires an understanding of how applications interact with the platform, including (but not limited to) parallel programming libraries such as Message Passing Interface (MPI). Several profiling and tracing tools exist that collect heavy runtime data traces either in memory (released only at application exit) or on a file system (imposing an I/O load that may interfere with the performance being measured). Although these approaches are beneficial in development stages and post-run analysis, a systemwide and low-overhead method is required to monitor deployed applications continuously. This method must be able to collect information at both the application and system levels to yield a complete performance picture. In our approach, an application profiler collects application event counters. A sampler uses an efficient inter-process communication method to periodically extract the application counters and stream them into an infrastructure for performance data collection. We implement a tool-set based on our approach and integrate it with the Lightweight Distributed Metric Service (LDMS) system, a monitoring system used on large-scale computational platforms. LDMS provides the infrastructure to create and gather streams of performance data in a low overhead manner. We demonstrate our approach using applications implemented with MPI, as it is one of the most common standards for the development of large-scale scientific applications. We utilize our tool-set to study the impact of our approach on an open source HPC application, Nalu. Our tool-set enables us to efficiently identify patterns in the behavior of the application without source-level knowledge. We leverage LDMS to collect system-level performance data and explore the correlation between the system and application events. Also, we demonstrate how our tool-set can help detect anomalies with a low latency. We run tests on two different architectures: a system enabled with Intel Xeon Phi and another system equipped with Intel Xeon processor. Our overhead study shows our method imposes at most 0.5\% CPU usage overhead on the application in realistic deployment scenarios.", acknowledgement = ack-nhfb, articleno = "8", fjournal = "ACM Transactions on Modeling and Performance Evaluation of Computing Systems (TOMPECS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J1525", } @Article{Kallenborn:2019:MPC, author = "Felix Kallenborn and Christian Hundt and Sebastian B{\"o}ser and Bertil Schmidt", title = "Massively parallel computation of atmospheric neutrino oscillations on {CUDA}-enabled accelerators", journal = j-COMP-PHYS-COMM, volume = "234", number = "??", pages = "235--244", month = jan, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.07.022", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Oct 16 18:11:50 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518302790", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Kang:2019:SAM, author = "Qiao Kang and Jesper Larsson Tr{\"a}ff and Reda Al-Bahrani and Ankit Agrawal and Alok Choudhary and Wei-keng Liao", title = "Scalable Algorithms for {MPI} Intergroup {Allgather} and {Allgatherv}", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "220--230", month = jul, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2019.04.015", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S016781911830320X", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Knap:2019:PEU, author = "Marcin Knap and Pawe{\l} Czarnul", title = "Performance evaluation of Unified Memory with prefetching and oversubscription for selected parallel {CUDA} applications on {NVIDIA} {Pascal} and {Volta} {GPUs}", journal = j-J-SUPERCOMPUTING, volume = "75", number = "11", pages = "7625--7645", month = nov, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02966-8", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jul 25 07:17:51 MDT 2020", bibsource = "http://link.springer.com/journal/11227/75/11; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/s11227-019-02966-8.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Knight:2019:TES, author = "Louise Knight and Polona Stefanic and Matej Cigale and Andrew C. Jones and Ian Taylor", title = "Towards extending the {SWITCH} platform for time-critical, cloud-based {CUDA} applications: Job scheduling parameters influencing performance", journal = j-FUT-GEN-COMP-SYS, volume = "100", number = "??", pages = "542--556", month = nov, year = "2019", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.05.039", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Feb 10 12:55:01 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18311014", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Kronbichler:2019:FMF, author = "Martin Kronbichler and Katharina Kormann", title = "Fast Matrix-Free Evaluation of Discontinuous {Galerkin} Finite Element Operators", journal = j-TOMS, volume = "45", number = "3", pages = "29:1--29:40", month = aug, year = "2019", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3325864", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Sep 3 17:49:22 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3325864", abstract = "We present an algorithmic framework for matrix-free evaluation of discontinuous Galerkin finite element operators. It relies on fast quadrature with sum factorization on quadrilateral and hexahedral meshes, targeting general weak forms of linear and nonlinear partial differential equations. Different algorithms and data structures are compared in an in-depth performance analysis. The implementations of the local integrals are optimized by vectorization over several cells and faces and an even-odd decomposition of the one-dimensional interpolations. Up to 60\% of the arithmetic peak on Intel Haswell, Broadwell, and Knights Landing processors is reached when running from caches and up to 40\% of peak when also considering the access to vectors from main memory. On 2$ \times $14 Broadwell cores, the throughput is up to 2.2 billion unknowns per second for the 3D Laplacian and up to 4 billion unknowns per second for the 3D advection on affine geometries, close to a simple copy operation at 4.7 billion unknowns per second. Our experiments show that MPI ghost exchange has a considerable impact on performance and we present strategies to mitigate this effect. Finally, various options for evaluating geometry terms and their performance are discussed. Our implementations are publicly available through the deal.II finite element library.", acknowledgement = ack-nhfb, articleno = "29", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Kumar:2019:FOP, author = "Ramavarmaraja Kishor Kumar and Vladimir Loncar and Paulsamy Muruganandam and Sadhan K. Adhikari and Antun Balaz", title = "{C} and {Fortran} {OpenMP} programs for rotating {Bose--Einstein} condensates", journal = j-COMP-PHYS-COMM, volume = "240", number = "??", pages = "74--82", month = jul, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2019.03.004", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 14 08:12:51 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300827", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @TechReport{Laguna:2019:GPD, author = "Ignacio Laguna and Paul C. Wood and Ranvijay Singh and Saurabh Bagchi", title = "{GPUMixer}: Performance-Driven Floating-Point Tuning for {GPU} Scientific Applications", type = "Report", institution = "Lawrence Livermore National Laboratory", address = "Livermore CA 94550, USA", year = "2019", bibdate = "Tue Aug 06 05:54:23 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://lagunaresearch.org/docs/isc-2019.pdf; https://www.hpcwire.com/2019/08/05/llnl-purdue-researchers-harness-gpu-mixed-precision-for-accuracy-performance-tradeoff/", abstract = "We present GPUMixer, a tool to perform mixed-precision floating-point tuning on scientific GPU applications. While precision tuning techniques are available, they are designed for serial programs and are accuracy-driven, i.e., they consider configurations that satisfy accuracy constraints, but these configurations may degrade performance. GPUMixer, in contrast, presents a performance-driven approach for tuning. We introduce a novel static analysis that finds Fast Imprecise Sets (FISets), sets of operations on low precision that minimize type conversions, which often yield performance speedups. To estimate the relative error introduced by GPU mixed-precision, we propose shadow computations analysis for GPUs, the first of this class for multi-threaded applications. GPUMixer obtains performance improvements of up to 46.4\% of the ideal speedup in comparison to only 20.7\% found by state-of-the-art methods.", acknowledgement = ack-nhfb, remark = "Best paper award at the 33rd ISC High Performance conference held June 16--20, 2019.", } @Article{Levy:2019:USE, author = "Scott Levy and Kurt B. Ferreira and Whit Schonbein and Ryan E. Grant and Matthew G. F. Dosanjh", title = "Using simulation to examine the effect of {MPI} message matching costs on application performance", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "63--74", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303272", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Li:2019:TBH, author = "Bing Li and Mengjie Mao and Xiaoxiao Liu and Tao Liu and Zihao Liu and Wujie Wen and Yiran Chen and Hai (Helen) Li", title = "Thread Batching for High-performance Energy-efficient {GPU} Memory Design", journal = j-JETC, volume = "15", number = "4", pages = "39:1--39:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3330152", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3330152", abstract = "Massive multi-threading in GPU imposes tremendous pressure on memory subsystems. Due to rapid growth in thread-level parallelism of GPU and slowly improved peak memory bandwidth, memory becomes a bottleneck of GPU's performance and energy efficiency. In this article, we propose an integrated architectural scheme to optimize the memory accesses and therefore boost the performance and energy efficiency of GPU. First, we propose a thread batch enabled memory partitioning (TEMP) to improve GPU memory access parallelism. In particular, TEMP groups multiple thread blocks that share the same set of pages into a thread batch and applies a page coloring mechanism to bound each stream multiprocessor (SM) to the dedicated memory banks. After that, TEMP dispatches the thread batch to an SM to ensure high-parallel memory-access streaming from the different thread blocks. Second, a thread batch-aware scheduling (TBAS) scheme is introduced to improve the GPU memory access locality and to reduce the contention on memory controllers and interconnection networks. Experimental results show that the integration of TEMP and TBAS can achieve up to 10.3\% performance improvement and 11.3\% DRAM energy reduction across diverse GPU applications. We also evaluate the performance interference of the mixed CPU+GPU workloads when they are run on a heterogeneous system that employs our proposed schemes. Our results show that a simple solution can effectively ensure the efficient execution of both GPU and CPU applications.", acknowledgement = ack-nhfb, articleno = "39", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lima:2019:PEA, author = "Jo{\~a}o Vicente Ferreira Lima and Issam Ra{\"\i}s and Laurent Lef{\`e}vre and Thierry Gautier", title = "Performance and energy analysis of {OpenMP} runtime systems with dense linear algebra algorithms", journal = j-IJHPCA, volume = "33", number = "3", pages = "431--443", day = "1", month = may, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342018792079", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:53 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342018792079", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Liu:2019:MML, author = "Qixiao Liu and Zhifeng Chen and Zhibin Yu", title = "{MiC}: Multi-level Characterization and Optimization of {GPGPU} Kernels", journal = j-JETC, volume = "15", number = "3", pages = "25:1--25:??", month = jun, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3304108", ISSN = "1550-4832", bibdate = "Fri Nov 29 16:06:01 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3304108", abstract = "Graphics processing units (GPUs)$^1$ have enjoyed increasing popularity in recent years, which benefits from, for example, general-purpose GPU (GPGPU) for parallel programs and new computing paradigms, such as the Internet of Things (IoT). GPUs hold great potential in providing effective solutions for big data analytics while the demands for processing large quantities of data in real time are also increasing. However, the pervasive presence of GPUs on mobile devices presents great challenges for GPGPU, mainly because GPGPU integrates a large amount of processor arrays and concurrent executing threads (up to hundreds of thousands). In particular, the root causes of performance loss in a GPGPU program can not be revealed in detail by current approaches. In this article, we propose MiC (Multi-level Characterization), a framework that comprehensively characterizes GPGPU kernels at the instruction, Basic Block (BBL), and thread levels. Specifically, we devise Instruction Vectors (IV) and Basic Blocks Vectors (BBV), a Thread Similarity Matrix (TSM), and a Divergence Flow Statistics Graph (DFSG) to profile information in each level. We use MiC to provide insights into GPGPU kernels through the characterizations of 34 kernels from popular GPGPU benchmark suites such as Compute Unified Device Architecture (CUDA) Software Development Kit (SDK), Rodinia, and Parboil. In comparison with Central Processing Unit (CPU) workloads, we conclude the key findings as follows: (1) There are comparable Instruction-Level Parallelism (ILP); (2) The BBL count is significantly smaller than CPU workloads-only 22.8 on average; (3) The dynamic instruction count per thread varies from dozens to tens of thousands and it is extremely small compared to CPU benchmarks; (4) The Pareto principle (also called 90/10 rule) does not apply to GPGPU kernels while it pervasively exists in CPU programs; (5) The loop patterns are dramatically different from those in CPU workloads; (6) The branch ratio is lower than that of CPU programs but higher than pure GPU workloads. In addition, we have also shown how TSM and DFSG are used to characterize the branch divergence in a visual way, to enable the analysis of thread behavior in GPGPU programs. In addition, we show an optimization case for a GPGPU kernel from the bottleneck identified through its characterization result, which improves 16.8\% performance.", acknowledgement = ack-nhfb, articleno = "25", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Lopes:2019:FBD, author = "Paulo A. C. Lopes and Satyendra Singh Yadav and Aleksandar Ilic and Sarat Kumar Patra", title = "Fast block distributed {CUDA} implementation of the {Hungarian} algorithm", journal = j-J-PAR-DIST-COMP, volume = "130", number = "??", pages = "50--62", month = aug, year = "2019", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2019.03.014", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon May 20 18:06:40 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519302254", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Lopez-Gomez:2019:ESP, author = "Javier L{\'o}pez-G{\'o}mez and Javier Fern{\'a}ndez Mu{\~n}oz and David del Rio Astorga and Manuel F. Dolz and J. Daniel Garcia", title = "Exploring stream parallel patterns in distributed {MPI} environments", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "24--36", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303442", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Lorenzon:2019:ASO, author = "A. F. Lorenzon and C. C. {de Oliveira} and J. D. Souza and A. C. S. Beck", title = "{Aurora}: Seamless Optimization of {OpenMP} Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "5", pages = "1007--1021", month = may, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2872992", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "application program interfaces; Aurora; code transformation; efficiency 91.0 percent; Energy-Delay Product; Feedback-Driven Threading; Hardware; Instruction sets; Ion radiation effects; Magnetosphere; message passing; Message systems; Microarchitecture; multicore processors; multiprocessing systems; OpenMP; OpenMP applications; OpenMP feature; OpenMP framework; optimization; parallel applications; parallel loop region; parallel processing; Runtime; runtime environments; seamless optimization; software developers; standard OpenMP execution; thread-level parallelism; Thread-level parallelism exploitation", } @Article{Losada:2019:LRR, author = "Nuria Losada and George Bosilca and Aur{\'e}lien Bouteiller and Patricia Gonz{\'a}lez and Mar{\'\i}a J. Mart{\'\i}n", title = "Local rollback for resilient {MPI} applications with application-level checkpointing and message logging", journal = j-FUT-GEN-COMP-SYS, volume = "91", number = "??", pages = "450--464", month = feb, year = "2019", CODEN = "FGSEVI", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Tue Feb 5 08:15:51 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.sciencedirect.com/science/article/pii/S0167739X18303443", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Lu:2019:PMM, author = "Gangzhao Lu and Weizhe Zhang and Hui He and Laurence T. Yang", title = "Performance modeling for {MPI} applications with low overhead fine-grained profiling", journal = j-FUT-GEN-COMP-SYS, volume = "90", number = "??", pages = "317--326", month = jan, year = "2019", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2018.08.018", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Tue Sep 18 14:07:59 MDT 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18308252", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Mercan:2019:CCH, author = "H. Mercan and C. Yilmaz and K. Kaya", title = "{CHiP}: A Configurable Hybrid Parallel Covering Array Constructor", journal = j-IEEE-TRANS-SOFTW-ENG, volume = "45", number = "12", pages = "1270--1291", month = dec, year = "2019", CODEN = "IESEDJ", DOI = "https://doi.org/10.1109/TSE.2018.2837759", ISSN = "0098-5589 (print), 1939-3520 (electronic)", ISSN-L = "0098-5589", bibdate = "Thu Dec 12 06:35:49 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranssoftweng2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Software Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=32", keywords = "Benchmark testing; constraint satisfaction problem; Covering arrays; CUDA; Graphics processing units; graphics processing units; metaheuristic search; parallel computing; Parallel processing; Scalability; Simulated annealing; Upper bound", } @Article{Mironov:2019:EMO, author = "Vladimir Mironov and Alexander Moskovsky and Michael D'Mello and Yuri Alexeev", title = "An efficient {MPI\slash OpenMP} parallelization of the {Hartree--Fock--Roothaan} method for the first generation of {Intel{\reg} Xeon Phi{\TM}} processor architecture", journal = j-IJHPCA, volume = "33", number = "1", pages = "212--224", day = "1", month = jan, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342017732628", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:53 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017732628", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Oh:2019:HPT, author = "S. Oh and N. Park and J. Jang and L. Sael and U. Kang", title = "High-Performance {Tucker} Factorization on Heterogeneous Platforms", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "10", pages = "2237--2248", month = oct, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2908639", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Dec 19 09:20:35 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Computer science; factor matrices; GPGPU; graph theory; Graphics processing units; GTA scales; GTA-FULL; GTA-PART; heterogeneous computing; Heterogeneous networks; heterogeneous platforms; high-performance Tucker factorization; large-scale multidimensional data; matrix decomposition; Memory management; memory requirements; Motion pictures; OpenCL; row-wise update rule; Scalability; Tensor analysis; tensor factorization algorithms; tucker factorization", } @Article{Ortega:2019:CAC, author = "G. Ortega and E. M. T. Hendrix and I. Garc{\'\i}a", title = "A {CUDA} approach to compute perishable inventory control policies using value iteration", journal = j-J-SUPERCOMPUTING, volume = "75", number = "3", pages = "1580--1593", month = mar, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-018-2692-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:17 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/3; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/s11227-018-2692-z.pdf", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Otero:2019:OAA, author = "Evelyn Otero and Jing Gong and Misun Min and Paul Fischer and Philipp Schlatter and Erwin Laure", title = "{OpenACC} acceleration for the {$ P_N $--$ P_{N - 2} $} algorithm in {Nek5000}", journal = j-J-PAR-DIST-COMP, volume = "132", number = "??", pages = "69--78", month = oct, year = "2019", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2019.05.010", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Sep 13 10:25:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731518305549", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Park:2019:DBO, author = "Sanghyun Park and Taeweon Suh", title = "{DQN}-based {OpenCL} workload partition for performance optimization", journal = j-J-SUPERCOMPUTING, volume = "75", number = "8", pages = "4875--4893", month = aug, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02766-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:21 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/8; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Perez:2019:ATO, author = "B. P{\'e}rez and E. Stafford and J. L. Bosque and R. Beivide and S. Mateo and X. Teruel and X. Martorell and E. Ayguad{\'e}", title = "Auto-tuned {OpenCL} kernel co-execution in {OmpSs} for heterogeneous systems", journal = j-J-PAR-DIST-COMP, volume = "125", number = "??", pages = "45--57", month = mar, year = "2019", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2018.11.001", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Mon Jan 7 07:58:40 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731518308189", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Pikle:2019:AFE, author = "Nileshchandra K. Pikle and Shailesh R. Sathe and Arvind Y. Vyavahare", title = "Accelerating the finite element analysis of functionally graded materials using fixed-grid strategy on {CUDA}-enabled {GPUs}", journal = j-CCPE, volume = "31", number = "17", pages = "e5207:1--e5207:??", day = "10", month = sep, year = "2019", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5207", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Sat Oct 12 11:00:05 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "03 April 2019", } @Article{Pirkelbauer:2019:BTF, author = "Peter Pirkelbauer and Amalee Wilson and Christina Peterson and Damian Dechev", title = "{Blaze-Tasks}: a Framework for Computing Parallel Reductions over Tasks", journal = j-TACO, volume = "15", number = "4", pages = "66:1--66:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3293448", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Tue Jan 8 17:20:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Compared to threads, tasks are a more fine-grained alternative. The task parallel programming model offers benefits in terms of better performance portability and better load-balancing for problems that exhibit nonuniform workloads. A common scenario of task parallel programming is that a task is recursively decomposed into smaller sub-tasks. Depending on the problem domain, the number of created sub-tasks may be nonuniform, thereby creating potential for significant load imbalances in the system. Dynamic load-balancing mechanisms will distribute the tasks across available threads. The final result of a computation may be modeled as a reduction over the results of all sub-tasks. This article describes a simple, yet effective prototype framework, Blaze-Tasks, for task scheduling and task reductions on shared memory architectures. The framework has been designed with lock-free techniques and generic programming principles in mind. Blaze-Tasks is implemented entirely in C++17 and is thus portable. To load-balance the computation, Blaze-Tasks uses task stealing. To manage contention on a task pool, the number of lock-free attempts to steal a task depends on the distance between thief and pool owner and the estimated number of tasks in a victim's pool. This article evaluates the Blaze framework on Intel and IBM dual-socket systems using nine benchmarks and compares its performance with other task parallel frameworks. While Cilk outperforms Blaze on Intel on most benchmarks, the evaluation shows that Blaze is competitive with OpenMP and other library-based implementations. On IBM, the experiments show that Blaze outperforms other approaches on most benchmarks.", acknowledgement = ack-nhfb, articleno = "66", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Prades:2019:GJM, author = "J. Prades and F. Silla", title = "{GPU}-Job Migration: The {rCUDA} Case", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "12", pages = "2718--2729", month = dec, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2924433", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Dec 19 09:20:35 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "CUDA; GPU; Graphics processing units; Middleware; migration; Proposals; rCUDA; Resource management; Virtual machining; virtualization; Virtualization", } @Article{Reano:2019:APP, author = "Carlos Rea{\~n}o and Javier Prades and Federico Silla", title = "Analyzing the performance\slash power tradeoff of the {rCUDA} middleware for future exascale systems", journal = j-J-PAR-DIST-COMP, volume = "132", number = "??", pages = "344--362", month = oct, year = "2019", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Fri Sep 13 10:25:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303491", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Reano:2019:SIN, author = "Carlos Rea{\~n}o and Federico Silla", title = "On the support of inter-node {P2P} {GPU} memory copies in {rCUDA}", journal = j-J-PAR-DIST-COMP, volume = "127", number = "??", pages = "28--43", month = may, year = "2019", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2018.12.011", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Mar 14 15:55:59 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519300255", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Riebler:2019:TAH, author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and Christian Plessl", title = "Transparent Acceleration for Heterogeneous Platforms With Compilation to {OpenCL}", journal = j-TACO, volume = "16", number = "2", pages = "14:1--14:??", month = may, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3319423", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Jul 26 14:25:54 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Multi-accelerator platforms combine CPUs and different accelerator architectures within a single compute node. Such systems are capable of processing parallel workloads very efficiently while being more energy efficient than regular systems consisting of CPUs only. However, the architectures of such systems are diverse, forcing developers to port applications to each accelerator using different programming languages, models, tools, and compilers. Developers not only require domain-specific knowledge but also need to understand the low-level accelerator details, leading to an increase in the design effort and costs. To tackle this challenge, we propose a compilation approach and a practical realization called HTrOP that is completely transparent to the user. HTrOP is able to automatically analyze a sequential CPU application, detect computational hotspots, and generate parallel OpenCL host and kernel code. The potential of HTrOP is demonstrated by offloading hotspots to different OpenCL-enabled resources (currently the CPU, the general-purpose GPU, and the manycore Intel Xeon Phi) for a broad set of benchmark applications. We present an in-depth evaluation of our approach in terms of performance gains and energy savings, taking into account all static and dynamic overheads. We are able to achieve speedups and energy savings of up to two orders of magnitude, if an application has sufficient computational intensity, when compared to a natively compiled application.", acknowledgement = ack-nhfb, articleno = "14", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Roth:2019:AOC, author = "{\'A}goston R{\'o}th", title = "Algorithm 992: An {OpenGL}- and {C++}-based Function Library for Curve and Surface Modeling in a Large Class of Extended {Chebyshev} Spaces", journal = j-TOMS, volume = "45", number = "1", pages = "13:1--13:32", month = mar, year = "2019", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3284979", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Mon May 6 18:23:42 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3284979", abstract = "We propose a platform-independent multi-threaded function library that provides data structures to generate, differentiate, and render both the ordinary basis and the normalized B-basis of a user-specified extended Chebyshev (EC) space that comprises the constants and can be identified with the solution space of a constant-coefficient homogeneous linear differential equation defined on a sufficiently small interval. Using the obtained normalized B-bases, our library can also generate, (partially) differentiate, modify, and visualize a large family of so-called B-curves and tensor product B-surfaces. Moreover, the library also implements methods that can be used to perform dimension elevation, to subdivide B-curves and B-surfaces by means of de Casteljau-like B-algorithms, and to generate basis transformations for the B-representation of arbitrary integral curves and surfaces that are described in traditional parametric form by means of the ordinary bases of the underlying EC spaces. Independently of the algebraic, exponential, trigonometric, or mixed type of the applied EC space, the proposed library is numerically stable and efficient up to a reasonable dimension number and may be useful for academics and engineers in the fields of Approximation Theory, Computer Aided Geometric Design, Computer Graphics, and Isogeometric and Numerical Analysis.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{Ruhela:2019:EDM, author = "Amit Ruhela and Hari Subramoni and Sourav Chakraborty and Mohammadreza Bayatpour and Pouya Kousha and Dhabaleswar K. (DK) Panda", title = "Efficient design for {MPI} asynchronous progress without dedicated resources", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "13--26", month = jul, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2019.03.003", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303302", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Sala:2019:IBN, author = "Kevin Sala and Xavier Teruel and Josep M. Perez and Antonio J. Pe{\~n}a and Vicen{\c{c}} Beltran and Jesus Labarta", title = "Integrating blocking and non-blocking {MPI} primitives with task-based programming models", journal = j-PARALLEL-COMPUTING, volume = "85", number = "??", pages = "153--166", month = jul, year = "2019", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2018.12.008", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303326", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Schardl:2019:TER, author = "Tao B. Schardl and William S. Moses and Charles E. Leiserson", title = "{Tapir}: Embedding Recursive Fork-join Parallelism into {LLVM}'s Intermediate Representation", journal = j-TOPC, volume = "6", number = "4", pages = "19:1--19:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3365655", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Dec 27 16:13:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3365655", abstract = "Tapir (pronounced TAY-per) is a compiler intermediate representation (IR) that embeds recursive fork-join parallelism, as supported by task-parallel programming platforms such as Cilk and OpenMP, into a mainstream compiler's IR. Mainstream compilers typically treat parallel linguistic constructs as syntactic sugar for function calls into a parallel runtime. These calls prevent the compiler from performing optimizations on and across parallel control constructs. Remedying this situation has generally been thought to require an extensive reworking of compiler analyses and code transformations to handle parallel semantics. Tapir leverages the ``serial-projection property,'' which is commonly satisfied by task-parallel programs, to handle the semantics of these programs without an extensive rework of the compiler. For recursive fork-join programs that satisfy the serial-projection property, Tapir enables effective compiler optimization of parallel programs with only minor changes to existing compiler analyses and code transformations. Tapir uses the serial-projection property to order logically parallel fine-grained tasks in the program's control-flow graph. This ordered representation of parallel tasks allows the compiler to optimize parallel codes effectively with only minor modifications. For example, to implement Tapir/LLVM, a prototype of Tapir in the LLVM compiler, we added or modified less than 3,000 lines of LLVM's half-million-line core middle-end functionality. These changes sufficed to enable LLVM's existing compiler optimizations for serial code-including loop-invariant-code motion, common-subexpression elimination, and tail-recursion elimination-to work with parallel control constructs such as parallel loops and Cilk's Cilk_Spawn keyword. Tapir also supports parallel optimizations, such as loop scheduling, which restructure the parallel control flow of the program. By making use of existing LLVM optimizations and new parallel optimizations, Tapir/LLVM can optimize recursive fork-join programs more effectively than traditional compilation methods. On a suite of 35 Cilk application benchmarks, Tapir/LLVM produces more efficient executables for 30 benchmarks, with faster 18-core running times for 26 of them, compared to a nearly identical compiler that compiles parallel linguistic constructs the traditional way.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Searles:2019:MOA, author = "Robert Searles and Sunita Chandrasekaran and Wayne Joubert and Oscar Hernandez", title = "{MPI + OpenACC}: Accelerating radiation transport mini-application, minisweep, on heterogeneous systems", journal = j-COMP-PHYS-COMM, volume = "236", number = "??", pages = "176--187", month = mar, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.10.007", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Jan 28 16:49:58 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303552", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Sharif:2019:APC, author = "Hashim Sharif and Prakalp Srivastava and Muhammad Huzaifa and Maria Kotsifakou and Keyur Joshi and Yasmin Sarita and Nathan Zhao and Vikram S. Adve and Sasa Misailovic and Sarita Adve", title = "{ApproxHPVM}: a portable compiler {IR} for accuracy-aware optimizations", journal = j-PACMPL, volume = "3", number = "OOPSLA", pages = "186:1--186:30", month = oct, year = "2019", DOI = "https://doi.org/10.1145/3360612", bibdate = "Fri Aug 7 19:22:30 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pacmpl.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3360612", abstract = "We propose ApproxHPVM, a compiler IR and system designed to enable accuracy-aware performance and energy tuning on heterogeneous systems with multiple compute units and approximation methods. ApproxHPVM automatically translates end-to-end application-portability across heterogeneous hardware platforms and enables future capabilities like accuracy-aware dynamic scheduling and design space exploration.\par ApproxHPVM incorporates three main components: (a) a compiler IR with hardware-agnostic approximation metrics, (b) a hardware-agnostic accuracy-tuning phase to identify error-tolerant computations, and (c) an accuracy-aware hardware scheduler that maps error-tolerant computations to approximate hardware components. As ApproxHPVM does not incorporate any hardware-specific knowledge as part of the IR, it can serve as a portable virtual ISA that can be shipped to all kinds of hardware platforms.\par We evaluate our framework on nine benchmarks from the deep learning domain and five image processing benchmarks. Our results show that our framework can offload chunks of approximable computations to special-purpose accelerators that provide significant gains in performance and energy, while staying within user-specified application-level quality metrics with high probability. Across the 14 benchmarks, we observe from $1$--$ 9 \times $ performance speedups and $ 1.1$--$ 11.3 \times $ energy reduction for very small reductions in accuracy.", acknowledgement = ack-nhfb, articleno = "186", fjournal = "Proceedings of the ACM on Programming Languages", journal-URL = "https://pacmpl.acm.org/", } @Article{Shea:2019:HSD, author = "Colin Shea and Tinoosh Mohsenin", title = "Heterogeneous Scheduling of Deep Neural Networks for Low-power Real-time Designs", journal = j-JETC, volume = "15", number = "4", pages = "36:1--36:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358699", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358699", abstract = "Deep neural networks have become the readiest answer to a range of application challenges including image recognition, stock analysis, natural language processing, and biomedical applications such as seizure detection. All while outperforming prior leading solutions that relied heavily on hand-engineered techniques. However, deployment of these neural networks often requires high-computational and memory-intensive solutions. These requirements make it challenging to deploy Deep Neural Networks (DNNs) in embedded, real-time low-power applications where classic architectures, GPUs and CPUs, still impose significant power burden. Systems-on-Chip (SoC) with Field-programmable Gate Arrays (FPGAs) can be used to improve performance and allow more fine-grain control of resources than CPUs or GPUs, but it is difficult to find the optimal balance between hardware and software to improve DNN efficiency. In the current research literature there have been few proposed solutions to address optimizing hardware and software deployments of DNNs in embedded low-power systems. To address the computation resource restriction and low-power needs for deploying these networks, we describe and implement a domain-specific metric model for optimizing task deployment on differing platforms, hardware and software. Next, we propose a DNN hardware accelerator called Scalable Low-power Accelerator for real-time deep neural Networks (SCALENet) that includes multithreaded software workers. Finally, we propose a heterogeneous aware scheduler that uses the DNN-specific metric models and the SCALENet accelerator to allocate a task to a resource based on solving a numerical cost for a series of domain objectives. To demonstrate the applicability of our contribution, we deploy nine modern deep network architectures, each containing a different number of parameters within the context of two different neural network applications: image processing and biomedical seizure detection. Utilizing the metric modeling techniques integrated into the heterogeneous aware scheduler and the SCALENet accelerator, we demonstrate the ability to meet computational requirements, adapt to multiple architectures, and lower power by providing an optimized task to resource allocation. Our heterogeneous aware scheduler improves power saving by decreasing power consumption by 10\% of the total system power, does not affect the accuracy of the networks, and still meets the real-time deadlines. We demonstrate the ability to achieve parity with or exceed the energy efficiency of NVIDIA GPUs when evaluated against Jetson TK1 with embedded GPU SoC and with a 4$ \times $ power savings in a power envelope of 2.0W. When compared to existing FPGA-based accelerators, SCALENet's accelerator and heterogeneous aware scheduler achieves a 4$ \times $ improvement in energy efficiency.", acknowledgement = ack-nhfb, articleno = "36", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Shekofteh:2019:MSG, author = "S.-Kazem Shekofteh and Hamid Noori and Mahmoud Naghibzadeh and Hadi Sadoghi Yazdi and Holger Fr{\"o}ning", title = "Metric Selection for {GPU} Kernel Classification", journal = j-TACO, volume = "15", number = "4", pages = "68:1--68:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295690", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Tue Jan 8 17:20:00 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Graphics Processing Units (GPUs) are vastly used for running massively parallel programs. GPU kernels exhibit different behavior at runtime and can usually be classified in a simple form as either ``compute-bound'' or ``memory-bound.'' Recent GPUs are capable of concurrently running multiple kernels, which raises the question of how to most appropriately schedule kernels to achieve higher performance. In particular, co-scheduling of compute-bound and memory-bound kernels seems promising. However, its benefits as well as drawbacks must be determined along with which kernels should be selected for a concurrent execution. Classifying kernels can be performed online by instrumentation based on performance counters. This work conducts a thorough analysis of the metrics collected from various benchmarks from Rodinia and CUDA SDK. The goal is to find the minimum number of effective metrics that enables online classification of kernels with a low overhead. This study employs a wrapper-based feature selection method based on the Fisher feature selection criterion. The results of experiments show that to classify kernels with a high accuracy, only three and five metrics are sufficient on a Kepler and a Pascal GPU, respectively. The proposed method is then utilized for a runtime scheduler. The results show an average speedup of 1.18$ \times $ and 1.1$ \times $ compared with a serial and a random scheduler, respectively.", acknowledgement = ack-nhfb, articleno = "68", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Shterenlikht:2019:MVF, author = "Anton Shterenlikht and Luis Cebamanos", title = "{MPI} vs {Fortran} coarrays beyond 100k cores: {$3$D} cellular automata", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "37--49", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/fortran3.bib; https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303181", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Simmendinger:2019:ISG, author = "Christian Simmendinger and Roman Iakymchuk and Luis Cebamanos and Dana Akhmetova and Valeria Bartsch and Tiberiu Rotaru and Mirko Rahn and Erwin Laure and Stefano Markidis", title = "Interoperability strategies for {GASPI} and {MPI} in large-scale scientific applications", journal = j-IJHPCA, volume = "33", number = "3", pages = "554--568", day = "1", month = may, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342018808359", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:53 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342018808359", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Song:2019:PGA, author = "You Song and Siyu Yang and Jinzhi Lei", title = "{ParaCells}: a {GPU} Architecture for Cell-Centered Models in Computational Biology", journal = j-TCBB, volume = "16", number = "3", pages = "994--1006", month = may, year = "2019", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2018.2814570", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Aug 23 11:22:19 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", abstract = "In computational biology, the hierarchy of biological systems requires the development of flexible and powerful computational tools. Graphics processing unit GPU architecture has been a suitable device for parallel computing in simulating multi-cellular systems. However, in modeling complex biological systems, scientists often face two tasks, mathematical formulation and skillful programming. In particular, specific programming skills are needed for GPU programming. Therefore, the development of an easy-to-use computational architecture, which utilizes GPU for parallel computing and provides intuitive interfaces for simple implementation, is needed so that general scientists can perform GPU simulations without knowing much about the GPU architecture. Here, we introduce ParaCells, a cell-centered GPU simulation architecture for NVIDIA compute unified device architecture CUDA. ParaCells was designed as a versatile architecture that connects the user logic in C++ with NVIDIA CUDA runtime and is specific to the modeling of multi-cellular systems. An advantage of ParaCells is its object-oriented model declaration, which allows it to be widely applied to many biological systems through the combination of basic biological concepts. We test ParaCells with two applications. Both applications are significantly faster when compared with sequential as well as parallel OpenMP and OpenACC implementations. Moreover, the simulation programs based on ParaCells are cleaner and more readable than other versions.", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954", } @Article{Speck:2019:APP, author = "Robert Speck", title = "{Algorithm 997}: {pySDC}-Prototyping Spectral Deferred Corrections", journal = j-TOMS, volume = "45", number = "3", pages = "35:1--35:23", month = aug, year = "2019", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3310410", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Sep 3 17:49:22 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/citation.cfm?id=3310410", abstract = "In this article, we present the Python framework pySDC for solving collocation problems with spectral deferred correction (SDC) methods and their time-parallel variant PFASST, the parallel full approximation scheme in space and time. pySDC features many implementations of SDC and PFASST, from simple implicit timestepping to high-order implicit-explicit or multi-implicit splitting and multilevel SDCs. The software package comes with many different, preimplemented examples and has seven tutorials to help new users with their first steps. Time parallelism is implemented either in an emulated way for debugging and prototyping or using MPI for benchmarking. The code is fully documented and tested using continuous integration, including most results of previous publications. Here, we describe the structure of the code by taking two different perspectives: those of the user and those of the developer. The first sheds light on the front-end, the examples, and the tutorials, and the second is used to describe the underlying implementation and the data structures. We show three different examples to highlight various aspects of the implementation, the capabilities, and the usage of pySDC. In addition, couplings to the FEniCS framework and PETSc, the latter including spatial parallelism with MPI, are described.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "http://dl.acm.org/pub.cfm?id=J782", } @Article{St-Onge:2019:ESS, author = "Guillaume St-Onge and Jean-Gabriel Young and Laurent H{\'e}bert-Dufresne and Louis J. Dub{\'e}", title = "Efficient sampling of spreading processes on complex networks using a composition and rejection algorithm", journal = j-COMP-PHYS-COMM, volume = "240", number = "??", pages = "30--37", month = jul, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2019.02.008", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 14 08:12:51 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300608", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Sultana:2019:FRB, author = "Nawrin Sultana and Martin R{\"u}fenacht and Anthony Skjellum and Ignacio Laguna and Kathryn Mohror", title = "Failure recovery for bulk synchronous applications with {MPI} stages", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "1--14", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303260", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tang:2019:MNT, author = "Yibin Tang and Ying Wang and Huawei Li and Xiaowei Li", title = "{MV-Net}: Toward Real-Time Deep Learning on Mobile {GPGPU} Systems", journal = j-JETC, volume = "15", number = "4", pages = "35:1--35:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3358696", ISSN = "1550-4832", bibdate = "Tue Dec 17 07:50:24 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3358696", abstract = "Recently the development of deep learning has been propelling the sheer growth of vision and speech applications on lightweight embedded and mobile systems. However, the limitation of computation resource and power delivery capability in embedded platforms is recognized as a significant bottleneck that prevents the systems from providing real-time deep learning ability, since the inference of deep convolutional neural networks (CNNs) and recurrent neural networks (RNNs) involves large quantities of weights and operations. Particularly, how to provide quality-of-services (QoS)-guaranteed neural network inference ability in the multitask execution environment of multicore SoCs is even more complicated due to the existence of resource contention. In this article, we present a novel deep neural network architecture, MV-Net, which provides performance elasticity and contention-aware self-scheduling ability for QoS enhancement in mobile computing systems. When the constraints of QoS, output accuracy, and resource contention status of the system change, MV-Net can dynamically reconfigure the corresponding neural network propagation paths and thus achieves an effective tradeoff between neural network computational complexity and prediction accuracy via approximate computing. The experimental results show that (1) MV-Net significantly improves the performance flexibility of current CNN models and makes it possible to provide always-guaranteed QoS in a multitask environment, and (2) it satisfies the quality-of-results (QoR) requirement, outperforming the baseline implementation significantly, and improves the system energy efficiency at the same time.", acknowledgement = ack-nhfb, articleno = "35", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967", } @Article{Tang:2019:QDL, author = "Xulong Tang and Ashutosh Pattnaik and Onur Kayiran and Adwait Jog and Mahmut Taylan Kandemir and Chita Das", title = "Quantifying Data Locality in Dynamic Parallelism in {GPUs}", journal = j-SIGMETRICS, volume = "47", number = "1", pages = "25--26", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3376930.3376947", ISSN = "0163-5999 (print), 1557-9484 (electronic)", ISSN-L = "0163-5999", bibdate = "Mon Jan 27 06:15:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmetrics.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3376930.3376947", abstract = "Dynamic parallelism (DP) is a new feature of emerging GPUs that allows new kernels to be generated and scheduled from the device-side (GPU) without the host-side (CPU) intervention. To efficiently support DP, one of the major challenges is to saturate the \ldots{}", acknowledgement = ack-nhfb, fjournal = "ACM SIGMETRICS Performance Evaluation Review", journal-URL = "https://dl.acm.org/loi/sigmetrics", } @Article{Teijeiro:2019:OPS, author = "Carlos Teijeiro and Thomas Hammerschmidt and Ralf Drautz and Godehard Sutmann", title = "Optimized parallel simulations of analytic bond-order potentials on hybrid shared\slash distributed memory with {MPI} and {OpenMP}", journal = j-IJHPCA, volume = "33", number = "2", pages = "227--241", day = "1", month = mar, year = "2019", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342017727060", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Wed Oct 9 14:35:53 MDT 2019", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017727060", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Teunissen:2019:GML, author = "J. Teunissen and R. Keppens", title = "A geometric multigrid library for quadtree\slash octree {AMR} grids coupled to {MPI-AMRVAC}", journal = j-COMP-PHYS-COMM, volume = "245", number = "??", pages = "Article 106866", month = dec, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2019.106866", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Tue Oct 29 11:44:58 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S001046551930253X", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Tian:2019:GAB, author = "Tian Tian and Dunwei Gong and Fei-Ching Kuo and Huai Liu", title = "Genetic algorithm based test data generation for {MPI} parallel programs with blocking communication", journal = j-J-SYST-SOFTW, volume = "155", number = "??", pages = "130--144", month = sep, year = "2019", CODEN = "JSSODM", ISSN = "0164-1212 (print), 1873-1228 (electronic)", ISSN-L = "0164-1212", bibdate = "Wed Oct 16 06:54:20 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0164121219300810", acknowledgement = ack-nhfb, fjournal = "Journal of Systems and Software", journal-URL = "http://www.sciencedirect.com/science/journal/01641212", } @Article{Tu:2019:AOS, author = "Chia-Heng Tu and Te-Sheng Lin", title = "Augmenting Operating Systems with {OpenCL} Accelerators", journal = j-TODAES, volume = "24", number = "3", pages = "30:1--30:29", month = jun, year = "2019", CODEN = "ATASFO", DOI = "https://doi.org/10.1145/3315569", ISSN = "1084-4309 (print), 1557-7309 (electronic)", ISSN-L = "1084-4309", bibdate = "Thu Jan 30 09:00:30 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/todaes.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3315569", abstract = "Heterogeneous computing leverages more than one kind of processors to boost the performance of user-space applications with the heterogeneous programming languages, e.g., OpenCL. While some works have been done to accelerate the computations required by \ldots{}", acknowledgement = ack-nhfb, articleno = "30", fjournal = "ACM Transactions on Design Automation of Electronic Systems", journal-URL = "https://dl.acm.org/loi/todaes", } @Article{Utterback:2019:POR, author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina Lee and Milind Kulkarni", title = "Processor-Oblivious Record and Replay", journal = j-TOPC, volume = "6", number = "4", pages = "20:1--20:??", month = dec, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3365659", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Dec 27 16:13:12 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3365659", abstract = "Record-and-replay systems are useful tools for debugging non-deterministic parallel programs by first recording an execution and then replaying that execution to produce the same access pattern. Existing record-and-replay systems generally target thread-based execution models, and record the behaviors and interleavings of individual threads. Dynamic multithreaded languages and libraries, such as the Cilk family, OpenMP, TBB, and the like, do not have a notion of threads. Instead, these languages provide a processor-oblivious model of programming, where programs expose task parallelism using high-level constructs such as spawn/sync without regard to the number of threads/cores available to run the program. Thread-based record-and-replay would violate the processor-oblivious nature of these programs, as they incorporate the number of threads into the recorded information, constraining the replayed execution to the same number of threads. In this article, we present a processor-oblivious record-and-replay scheme for dynamic multithreaded languages where record and replay can use different number of processors and both are scheduled using work stealing. We provide theoretical guarantees for our record and replay scheme-namely that record is optimal for programs with one lock and replay is near-optimal for all cases. In addition, we implemented this scheme in the Cilk Plus runtime system and our evaluation indicates that processor-obliviousness does not cause substantial overheads.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Valero-Lara:2019:MTS, author = "Pedro Valero-Lara and Ra{\"u}l Sirvent and Antonio J. Pe{\~n}a and Jes{\'u}s Labarta", title = "{MPI + OpenMP} tasking scalability for multi-morphology simulations of the human brain", journal = j-PARALLEL-COMPUTING, volume = "84", number = "??", pages = "50--61", month = may, year = "2019", CODEN = "PACOEJ", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Oct 14 16:20:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S016781911830317X", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Vasilache:2019:NAL, author = "Nicolas Vasilache and Oleksandr Zinenko and Theodoros Theodoridis and Priya Goyal and Zachary Devito and William S. Moses and Sven Verdoolaege and Andrew Adams and Albert Cohen", title = "The Next 700 Accelerated Layers: From Mathematical Expressions of Network Computation Graphs to Accelerated {GPU} Kernels, Automatically", journal = j-TACO, volume = "16", number = "4", pages = "38:1--38:??", month = oct, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3355606", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Sat Oct 12 15:31:26 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", abstract = "Deep learning frameworks automate the deployment, distribution, synchronization, memory allocation, and hardware acceleration of models represented as graphs of computational operators. These operators wrap high-performance libraries such as cuDNN or NNPACK. When the computation does not match any predefined library call, custom operators must be implemented, often at high engineering cost and performance penalty, limiting the pace of innovation. To address this productivity gap, we propose and evaluate: (1) a domain-specific language with a tensor notation close to the mathematics of deep learning; (2) a Just-In-Time optimizing compiler based on the polyhedral framework; (3) carefully coordinated linear optimization and evolutionary algorithms to synthesize high-performance CUDA kernels; (4) the transparent integration of our flow into PyTorch and Caffe2, providing the fully automatic synthesis of high-performance GPU kernels from simple tensor algebra. The performance is comparable to, and often exceeds the performance of, highly tuned libraries.", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924", } @Article{Vitali:2019:EOO, author = "Emanuele Vitali and Davide Gadioli and Gianluca Palermo and Andrea Beccari and Carlo Cavazzoni and Cristina Silvano", title = "Exploiting {OpenMP} and {OpenACC} to accelerate a geometric approach to molecular docking in heterogeneous {HPC} nodes", journal = j-J-SUPERCOMPUTING, volume = "75", number = "7", pages = "3374--3396", month = jul, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02875-w", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:20 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/7; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Vu:2019:FMT, author = "V. A. Vu and G. Tan", title = "A Framework for Mesoscopic Traffic Simulation in {GPU}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "8", pages = "1691--1703", month = aug, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2896636", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Computational modeling; Data models; data parallelism; data structures; demand and supply components; GPU; GPU threads; graphics processing units; Graphics processing units; high-performance computing; innovative data structure; Load modeling; Loading; mesoscopic traffic simulation; Microscopy; optimisation; optimization; road traffic; simulation algorithm; simulation flow; traffic engineering computing; traffic management support capabilities; traffic network; Vehicles", } @Article{Waidyasooriya:2019:OBD, author = "Hasitha Muthumala Waidyasooriya and Masanori Hariyama and Masamichi J. Miyama and Masayuki Ohzeki", title = "{OpenCL}-based design of an {FPGA} accelerator for quantum annealing simulation", journal = j-J-SUPERCOMPUTING, volume = "75", number = "8", pages = "5019--5039", month = aug, year = "2019", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02778-w", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Thu Oct 10 15:31:21 MDT 2019", bibsource = "http://link.springer.com/journal/11227/75/8; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Wang:2019:FBA, author = "Haomiao Wang and Prabu Thiagaraj and Oliver Sinnen", title = "{FPGA}-based Acceleration of {FT} Convolution for Pulsar Search Using {OpenCL}", journal = j-TRETS, volume = "11", number = "4", pages = "24:1--24:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3268933", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Sat Oct 19 17:43:01 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3268933", abstract = "The Square Kilometre Array (SKA) project will be the world's largest radio telescope array. With its large number of antennas, the number of signals that need to be processed is dramatic. One important element of the SKA's Central Signal Processor package is pulsar search. This article focuses on the FPGA-based acceleration of the Frequency-Domain Acceleration Search module, which is a part of SKA pulsar search engine. In this module, the frequency-domain input signals have to be processed by 85 Finite Impulse response (FIR) filters within a short period of limitation and for thousands of input arrays. Because of the large scale of the input length and FIR filter size, even high-end FPGA devices cannot parallelise the task completely. We start by investigating both time-domain FIR filter (TDFIR) and frequency-domain FIR filter (FDFIR) to tackle this task. We applied the overlap-add algorithm to split the coefficient array of TDFIR and the overlap-save algorithm to split the input signals of FDFIR. To achieve fast prototyping design, we employed OpenCL, which is a high-level FPGA development technique. The performance and power consumption are evaluated using multiple FPGA devices simultaneously and compared with GPU results, which is achieved by porting FPGA-based OpenCL kernels. The experimental evaluation shows that the FDFIR solution is very competitive in terms of performance, with a clear energy consumption advantage over the GPU solution.", acknowledgement = ack-nhfb, articleno = "24", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "http://portal.acm.org/toc.cfm?id=J1151", } @Article{Wang:2019:MEM, author = "L. Wang and M. Jahre and A. Adileh and Z. Wang and L. Eeckhout", title = "Modeling Emerging Memory-Divergent {GPU} Applications", journal = j-IEEE-COMPUT-ARCHIT-LETT, volume = "18", number = "2", pages = "95--98", month = jul, year = "2019", DOI = "https://doi.org/10.1109/LCA.2019.2923618", ISSN = "1556-6056 (print), 1556-6064 (electronic)", ISSN-L = "1556-6056", bibdate = "Tue Oct 1 10:18:16 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", abstract = "Analytical performance models yield valuable architectural insight without incurring the excessive runtime overheads of simulation. In this work, we study contemporary GPU applications and find that the key performance-related behavior of such applications is distinct from traditional GPU applications. The key issue is that these GPU applications are memory-intensive and have poor spatial locality, which implies that the loads of different threads commonly access different cache blocks. Such memory-divergent applications quickly exhaust the number of misses the L1 cache can process concurrently, and thereby cripple the GPU's ability to use Memory-Level Parallelism (MLP) and Thread-Level Parallelism (TLP) to hide memory latencies. Our Memory Divergence Model (MDM) is able to accurately represent this behavior and thereby reduces average performance prediction error by $ 14 \times $ compared to the state-of-the-art GPUMech approach across our memory-divergent applications.", acknowledgement = ack-nhfb, fjournal = "IEEE Computer Architecture Letters", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208", keywords = "Analytical models; analytical performance models; Analytical performance prediction; average performance prediction error; cache blocks; cache storage; Computational modeling; contemporary GPU applications; GPU; graphics processing units; Graphics processing units; Instruction sets; key performance-related behavior; L1 cache; Mathematical model; memory architecture; memory divergence model; memory latencies; memory-divergent applications; memory-divergent GPU applications; memory-intensive; memory-level parallelism; multi-threading; multiprocessing systems; Predictive models; Random access memory; thread-level parallelism; traditional GPU applications; valuable architectural insight", } @Article{Warren:2019:CBG, author = "Craig Warren and Antonios Giannopoulos and Alan Gray and Iraklis Giannakis and Alan Patterson and Laura Wetter and Andre Hamrah", title = "A {CUDA}-based {GPU} engine for {gprMax}: Open source {FDTD} electromagnetic simulation software", journal = j-COMP-PHYS-COMM, volume = "237", number = "??", pages = "208--218", month = apr, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.11.007", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Feb 6 15:16:58 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/gnu.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303990", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Wende:2019:OVT, author = "Florian Wende and Martijn Marsman and Jeongnim Kim and Fedor Vasilev and Zhengji Zhao and Thomas Steinke", title = "{OpenMP} in {VASP}: Threading and {SIMD}", journal = j-IJQC, volume = "119", number = "12", pages = "e25851:1--e25851:??", day = "15", month = jun, year = "2019", CODEN = "IJQCB2", DOI = "https://doi.org/10.1002/qua.25851", ISSN = "0020-7608 (print), 1097-461X (electronic)", ISSN-L = "0020-7608", bibdate = "Wed Oct 9 06:14:07 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ijqc2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "International Journal of Quantum Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/", onlinedate = "19 December 2018", } @Article{Winkler:2019:GSM, author = "Daniel Winkler and Massoud Rezavand and Michael Meister and Wolfgang Rauch", title = "{gpuSPHASE} --- a shared memory caching implementation for {$2$D} {SPH} using {CUDA} (new version announcement)", journal = j-COMP-PHYS-COMM, volume = "235", number = "??", pages = "514--516", month = feb, year = "2019", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2018.08.016", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Nov 24 07:45:46 MST 2018", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303126", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Wozniak:2019:MJW, author = "Justin M. Wozniak and Matthieu Dorier and Robert Ross and Tong Shu and Tahsin Kurc and Li Tang and Norbert Podhorszki and Matthew Wolf", title = "{MPI} jobs within {MPI} jobs: a practical way of enabling task-level fault-tolerance in {HPC} workflows", journal = j-FUT-GEN-COMP-SYS, volume = "101", number = "??", pages = "576--589", month = dec, year = "2019", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.05.020", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Feb 10 12:55:02 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X1830757X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Wu:2019:PMG, author = "J. Wu and X. Yang and Z. Zhang and G. Chen and R. Mao", title = "A Performance Model for {GPU} Architectures that Considers On-Chip Resources: Application to Medical Image Registration", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "9", pages = "1947--1961", month = sep, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2905213", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Computational modeling; Computer architecture; CPU; data transfer; Data transfer; GPU architectures; graphics processing unit; graphics processing units; Graphics processing units; graphics processing units; image registration; Image registration; medical image processing; medical image registration; NVIDIA GPUs; on-chip GPU resources; on-chip resources; parallel programming; parallel programs; Performance model; performance model; Predictive models; System-on-chip", } @Article{Yeh:2019:PGR, author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and Rudolf Eigenmann and Timothy G. Rogers", title = "{Pagoda}: a {GPU} Runtime System for Narrow Tasks", journal = j-TOPC, volume = "6", number = "4", pages = "21:1--21:??", month = nov, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3365657", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Wed Nov 20 07:59:59 MST 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", abstract = "Massively multithreaded GPUs achieve high throughput by running thousands of threads in parallel. To fully utilize the their hardware, contemporary workloads spawn work to the GPU in bulk by launching large tasks, where each task is a kernel that contains thousands of threads that occupy the entire GPU. GPUs face severe underutilization and their performance benefits vanish if the tasks are narrow, i.e., they contain less than 512 threads. Latency-sensitive applications in network, signal, and image processing that generate a large number of tasks with relatively small inputs are examples of such limited parallelism. This article presents Pagoda, a runtime system that virtualizes GPU resources, using an OS-like daemon kernel called MasterKernel. Tasks are spawned from the CPU onto Pagoda as they become available, and are scheduled by the MasterKernel at the warp granularity. This level of control enables the GPU to keep scheduling and executing tasks as long as free warps are found, dramatically reducing underutilization. Experimental results on real hardware demonstrate that Pagoda achieves a geometric mean speedup of 5.52X over PThreads running on a 20-core CPU, 1.76X over CUDA-HyperQ, and 1.44X over GeMTC, the state-of-the-art runtime GPU task scheduling system.", acknowledgement = ack-nhfb, articleno = "21", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "http://dl.acm.org/citation.cfm?id=2632163", } @Article{Zaitsev:2019:SLD, author = "D. Zaitsev and S. Tomov and J. Dongarra", title = "Solving Linear {Diophantine} Systems on Parallel Architectures", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "30", number = "5", pages = "1158--1169", month = may, year = "2019", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2018.2873354", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Aug 30 06:09:58 2019", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "application program interfaces; clan; discrete system modeling; discrete-event systems; distributed memory systems; distributed-memory computing nodes; distributing systems; dynamic task-dispatching subsystem; formal languages; linear Diophantine system; linear Diophantine systems-of-equations; logic programming; Mathematical model; mathematics computing; Matrix decomposition; message passing; model checking; MPI; multiple cores; nonnegative integer numbers; OpenMP; parallel architectures; Parallel architectures; parallel architectures; parallel-sequential composition; Petri net; Petri nets; polynomials; single indecomposable system; Software algorithms; Sparse matrices; sparse matrices; sparse matrix; speed-up; system clans; Task analysis; two-level parallelization concept", } @Article{Adamek:2020:GFC, author = "Karel Ad{\'a}mek and Sofia Dimoudi and Mike Giles and Wesley Armour", title = "{GPU} Fast Convolution via the Overlap-and-Save Method in Shared Memory", journal = j-TACO, volume = "17", number = "3", pages = "18:1--18:20", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394116", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Fri Aug 28 12:02:00 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/taco.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3394116", abstract = "We present an implementation of the overlap-and-save method, a method for the convolution of very long signals with short response functions, which is tailored to GPUs. We have implemented several FFT algorithms (using the CUDA programming language), \ldots{}", acknowledgement = ack-nhfb, articleno = "18", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "https://dl.acm.org/loi/taco", } @Article{Al-Mouhamed:2020:RCO, author = "Mayez A. Al-Mouhamed and Ayaz H. Khan and Nazeeruddin Mohammad", title = "A review of {CUDA} optimization techniques and tools for structured grid computing", journal = j-COMPUTING, volume = "102", number = "4", pages = "977--1003", month = apr, year = "2020", CODEN = "CMPTA2", DOI = "https://doi.org/10.1007/s00607-019-00744-1", ISSN = "0010-485X (print), 1436-5057 (electronic)", ISSN-L = "0010-485X", bibdate = "Tue May 12 18:02:15 MDT 2020", bibsource = "http://link.springer.com/journal/607/102/4; https://www.math.utah.edu/pub/tex/bib/computing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing", journal-URL = "http://link.springer.com/journal/607", } @Article{Allegretti:2020:OBB, author = "S. Allegretti and F. Bolelli and C. Grana", title = "Optimized Block-Based Algorithms to Label Connected Components on {GPUs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "2", pages = "423--438", month = feb, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2934683", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "connected components labeling; CUDA; GPU; Parallel processing", } @Article{Amos:2020:AQQ, author = "Brandon D. Amos and David R. Easterling and Layne T. Watson and William I. Thacker and Brent S. Castle and Michael W. Trosset", title = "{Algorithm 1007}: {QNSTOP} --- Quasi-{Newton} Algorithm for Stochastic Optimization", journal = j-TOMS, volume = "46", number = "2", pages = "17:1--17:20", month = jun, year = "2020", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3374219", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Fri Jun 12 07:37:53 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3374219", abstract = "QNSTOP consists of serial and parallel (OpenMP) Fortran 2003 codes for the quasi-Newton stochastic optimization method of Castle and Trosset for stochastic search problems. A complete description of QNSTOP for both local search with stochastic objective and global search with ``noisy'' deterministic objective is given here, to the best of our knowledge, for the first time. For stochastic search problems, some convergence theory exists for particular algorithmic choices and parameter values. Both the parallel driver subroutine, which offers several parallel decomposition strategies, and the serial driver subroutine can be used for local stochastic search or global deterministic search, based on an input switch. Some performance data for computational systems biology problems is given.", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Arabnejad:2020:SSC, author = "Hamid Arabnejad and Jo{\~a}o Bispo and Jorge G. Barbosa", title = "Source-to-source compilation targeting {OpenMP}-based automatic parallelization of {C} applications", journal = j-J-SUPERCOMPUTING, volume = "76", number = "9", pages = "6753--6785", month = sep, year = "2020", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-03109-9", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:19:58 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-019-03109-9", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 17 December 2019 Pages: 6753 - 6785", } @Article{Awan:2020:CPC, author = "A. A. Awan and A. Jain and C. Chu and H. Subramoni and D. K. Panda", title = "Communication Profiling and Characterization of Deep-Learning Workloads on Clusters With High-Performance Interconnects", journal = j-IEEE-MICRO, volume = "40", number = "1", pages = "35--43", month = jan, year = "2020", CODEN = "IEMIDZ", DOI = "https://doi.org/10.1109/MM.2019.2949986", ISSN = "0272-1732 (print), 1937-4143 (electronic)", ISSN-L = "0272-1732", bibdate = "Wed Jan 22 06:22:53 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeemicro.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Micro", journal-URL = "http://www.computer.org/csdl/mags/mi/index.html", keywords = "Communication Libraries; Deep learning; Distributed computing; Graphics processing units; Heterogeneous networks; Horovod; InfiniBand; Middleware; MVAPICH2 MPI; NVLink; Omni-Path; PCIe; Performance Analysis; Performance analysis; Profiling; TensorFlow; Training data", } @Article{Baek:2020:ESO, author = "Nakhoon Baek", title = "An emulation scheme for {OpenGL SC 2.0} over {OpenGL}", journal = j-J-SUPERCOMPUTING, volume = "76", number = "10", pages = "7951--7960", month = oct, year = "2020", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-018-2399-1", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:19:56 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-018-2399-1", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 02 May 2018 Pages: 7951 - 7960", } @Article{Ballard:2020:TPC, author = "Grey Ballard and Alicia Klinvex and Tamara G. Kolda", title = "{TuckerMPI}: a Parallel {C++\slash MPI} Software Package for Large-scale Data Compression via the {Tucker} Tensor Decomposition", journal = j-TOMS, volume = "46", number = "2", pages = "13:1--13:31", month = jun, year = "2020", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3378445", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Fri Jun 12 07:37:53 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378445", abstract = "Our goal is compression of massive-scale grid-structured data, such as the multi-terabyte output of a high-fidelity computational simulation. For such data sets, we have developed a new software package called TuckerMPI, a parallel C++/MPI software package for compressing distributed data. The approach is based on treating the data as a tensor, i.e., a multidimensional array, and computing its truncated Tucker decomposition, a higher-order analogue to the truncated singular value decomposition of a matrix. The result is a low-rank approximation of the original tensor-structured data. Compression efficiency is achieved by detecting latent global structure within the data, which we contrast to most compression methods that are focused on local structure. In this work, we describe TuckerMPI, our implementation of the truncated Tucker decomposition, including details of the data distribution and in-memory layouts, the parallel and serial implementations of the key kernels, and analysis of the storage, communication, and computational costs. We test the software on 4.5 and 6.7 terabyte data sets distributed across 100 s of nodes (1,000 s of MPI processes), achieving compression ratios between 100 and 200,000$ \times $, which equates to 99--99.999\% compression (depending on the desired accuracy) in substantially less time than it would take to even read the same dataset from a parallel file system. Moreover, we show that our method also allows for reconstruction of partial or down-sampled data on a single node, without a parallel computer so long as the reconstructed portion is small enough to fit on a single machine, e.g., in the instance of reconstructing/visualizing a single down-sampled time step or computing summary statistics. The code is available at https://gitlab.com/tensors/TuckerMPI.", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Barreda:2020:IFC, author = "Mar{\'\i}a Barreda and Jos{\'e} I. Aliaga and Marc Casas", title = "Iteration-fusing conjugate gradient for sparse linear systems with {MPI + OmpSs}", journal = j-J-SUPERCOMPUTING, volume = "76", number = "9", pages = "6669--6689", month = sep, year = "2020", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-03100-4", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:19:58 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-019-03100-4", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 10 December 2019 Pages: 6669 - 6689", } @Article{Bernholdt:2020:SMU, author = "David E. Bernholdt and Swen Boehm and George Bosilca and Manjunath Gorentla Venkata and Ryan E. Grant and Thomas Naughton and Howard P. Pritchard and Martin Schulz and Geoffroy R. Vallee", title = "A survey of {MPI} usage in the {US} exascale computing project", journal = j-CCPE, volume = "32", number = "3", pages = "e4851:1--e4851:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4851", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "27 September 2018", } @Article{Bombieri:2020:MIB, author = "N. Bombieri and F. Busato and A. Danese and L. Piccolboni and G. Pravadelli", title = "{Mangrove}: An Inference-Based Dynamic Invariant Mining for {GPU} Architectures", journal = j-IEEE-TRANS-COMPUT, volume = "69", number = "4", pages = "606--620", month = apr, year = "2020", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2019.2953846", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Mar 12 16:58:27 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", keywords = "GPUs; inference; Invarinant mining", } @Article{Cabral:2020:EMO, author = "Frederico L. Cabral and Sanderson L. Gonzaga de Oliveira and Carla Osthoff and Gabriel P. Costa and Diego N. Brand{\~a}o and Mauricio Kischinhevsky", title = "An evaluation of {MPI} and {OpenMP} paradigms in finite-difference explicit methods for {PDEs} on shared-memory multi- and manycore systems", journal = j-CCPE, volume = "32", number = "20", pages = "e5642:1--e5642:??", day = "25", month = oct, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5642", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:20 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "29 December 2019", } @Article{Cesarini:2020:CSR, author = "D. Cesarini and A. Bartolini and A. Borghesi and C. Cavazzoni and M. Luisier and L. Benini", title = "Countdown Slack: a Run-Time Library to Reduce Energy Footprint in Large-Scale {MPI} Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "11", pages = "2696--2709", year = "2020", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Sat Aug 15 14:52:38 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Chakraborty:2020:ESE, author = "Sourav Chakraborty and Ignacio Laguna and Murali Emani and Kathryn Mohror and Dhabaleswar K. Panda and Martin Schulz and Hari Subramoni", title = "{ER einit}: Scalable and efficient fault-tolerance for bulk-synchronous {MPI} applications", journal = j-CCPE, volume = "32", number = "3", pages = "e4863:1--e4863:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4863", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "14 August 2018", } @Article{Chang:2020:ADI, author = "Tyler H. Chang and Layne T. Watson and Thomas C. H. Lux and Ali R. Butt and Kirk W. Cameron and Yili Hong", title = "{Algorithm 1012}: {DELAUNAYSPARSE}: Interpolation via a Sparse Subset of the {Delaunay} Triangulation in Medium to High Dimensions", journal = j-TOMS, volume = "46", number = "4", pages = "38:1--38:20", month = nov, year = "2020", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3422818", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Sat Nov 14 07:15:52 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3422818", abstract = "DELAUNAYSPARSE contains both serial and parallel codes written in Fortran 2003 (with OpenMP) for performing medium- to high-dimensional interpolation via the Delaunay triangulation. To accommodate the exponential growth in the size of the Delaunay \ldots{}", acknowledgement = ack-nhfb, articleno = "38", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Cho:2020:PMP, author = "Y. Cho and S. Oh and B. Egger", title = "Performance Modeling of Parallel Loops on Multi-Socket Platforms Using Queueing Systems", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "2", pages = "318--331", month = feb, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2938172", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Computational modeling; Dynamic scheduling; multi-socket system; Multicore processing; NUMA; OpenMP; parallel loop; Performance modeling; Predictive models; queueing system; Servers; Time factors", } @Article{Daleiden:2020:GPP, author = "Patrick Daleiden and Andreas Stefik and Philip Merlin Uesbeck", title = "{GPU} Programming Productivity in Different Abstraction Paradigms: a Randomized Controlled Trial Comparing {CUDA} and Thrust", journal = j-TOCE, volume = "20", number = "4", pages = "27:1--27:27", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3418301", ISSN = "1946-6226", ISSN-L = "1946-6226", bibdate = "Sat Mar 20 18:20:46 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toce.bib", URL = "https://dl.acm.org/doi/10.1145/3418301", abstract = "Coprocessor architectures in High Performance Computing are prevalent in today's scientific computing clusters and require specialized knowledge for proper utilization. Various alternative paradigms for parallel and offload computation exist, but little \ldots{}", acknowledgement = ack-nhfb, articleno = "27", fjournal = "ACM Transactions on Computing Education", journal-URL = "https://dl.acm.org/loi/toce", } @Article{Davydov:2020:ADS, author = "Denis Davydov and Martin Kronbichler", title = "Algorithms and Data Structures for Matrix-Free Finite Element Operators with {MPI}-Parallel Sparse Multi-Vectors", journal = j-TOPC, volume = "7", number = "3", pages = "20:1--20:30", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3399736", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Thu Aug 6 08:56:07 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3399736", abstract = "Traditional solution approaches for problems in quantum mechanics scale as $ O(M^3) $, where $M$ is the number of electrons. Various methods have been proposed to address this issue and obtain a linear scaling $ O(M)$. One promising formulation is the direct minimization of energy. Such methods take advantage of physical localization of the solution, allowing users to seek it in terms of non-orthogonal orbitals with local support.\par This work proposes a numerically efficient implementation of sparse parallel vectors within the open-source finite element library deal.II. The main algorithmic ingredient is the matrix-free evaluation of the Hamiltonian operator by cell-wise quadrature. Based on an a-priori chosen support for each vector, we develop algorithms and data structures to perform (i) matrix-free sparse matrix multivector products (SpMM), (ii) the projection of an operator onto a sparse sub-space (inner products), and (iii) post-multiplication of a sparse multivector with a square matrix. The node-level performance is analyzed using a roofline model. Our matrix-free implementation of finite element operators with sparse multivectors achieves a performance of 157 GFlop/s on an Intel Cascade Lake processor with 20 cores. Strong and weak scaling results are reported for a representative benchmark problem using quadratic and quartic finite element bases.", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "https://dl.acm.org/loi/topc", } @Article{Deng:2020:CCB, author = "Y. Deng and T. Li and Y. Luo and X. Zhao", title = "Corrections to {``CUDA-Based Volume Rendering and Inspection for Time-Varying Ultrasonic Testing Datasets''}", journal = j-COMPUT-SCI-ENG, volume = "22", number = "1", pages = "4--4", month = jan # "\slash " # feb, year = "2020", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2019.2948481", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Thu Mar 05 14:46:04 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "See \cite{Deng:2019:CBV}.", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", keywords = "Acoustics; Biographies; Inspection; Rendering (computer graphics); Testing", } @Article{Diener:2020:HCO, author = "Matthias Diener and Laxmikant V. Kale and Daniel J. Bodony", title = "Heterogeneous computing with {OpenMP} and {Hydra}", journal = j-CCPE, volume = "32", number = "20", pages = "e5728:1--e5728:??", day = "25", month = oct, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5728", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:20 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "07 March 2020", } @Article{Eichenberger:2020:HCG, author = "A. E. Eichenberger and G.-T. Bercea and A. Bataev and L. Grinberg and J. K. O'Brien", title = "Hybrid {CPU\slash GPU} tasks optimized for concurrency in {OpenMP}", journal = j-IBM-JRD, volume = "64", number = "3/4", pages = "13:1--13:14", month = may # "\slash " # jul, year = "2020", CODEN = "IBMJAE", DOI = "https://doi.org/10.1147/JRD.2019.2960245", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 3 18:35:26 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ibmjrd.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", } @Article{Eichstadt:2020:CSM, author = "Jan Eichst{\"a}dt and Martin Vymazal and David Moxey and Joaquim Peir{\'o}", title = "A comparison of the shared-memory parallel programming models {{\em OpenMP}}, {{\em OpenACC}} and {{\em Kokkos}} in the context of implicit solvers for high-order {FEM}", journal = j-COMP-PHYS-COMM, volume = "255", number = "??", pages = "Article 107245", month = oct, year = "2020", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2020.107245", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 19 07:19:50 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465520300746", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Elis:2020:QNG, author = "Bengisu Elis and Dai Yang and Olga Pearce and Kathryn Mohror and Martin Schulz", title = "{QMPI}: a next generation {MPI} profiling interface for modern {HPC} platforms", journal = j-PARALLEL-COMPUTING, volume = "96", number = "??", pages = "Article 102635", month = aug, year = "2020", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2020.102635", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:36:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300284", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Fan:2020:ALC, author = "Q. Fan and D. J. Lilja and S. S. Sapatnekar", title = "Adaptive-Length Coding of Image Data for Low-Cost Approximate Storage", journal = j-IEEE-TRANS-COMPUT, volume = "69", number = "2", pages = "239--252", month = feb, year = "2020", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2019.2946795", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Wed Jan 22 06:44:09 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib; https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", keywords = "Adaptive-length coding; approximate storage; Discrete cosine transforms; error-resilience; Huffman coding; Image coding; Reliability; Resilience; Transform coding", } @Article{Ferreira:2020:HMM, author = "Kurt Ferreira and Ryan E. Grant and Michael J. Levenhagen and Scott Levy and Taylor Groves", title = "Hardware {MPI} message matching: Insights into {MPI} matching behavior to inform design", journal = j-CCPE, volume = "32", number = "3", pages = "e5150:1--e5150:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5150", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "27 February 2019", } @Article{Gao:2020:MES, author = "T. Gao and Y. Guo and B. Zhang and P. Cicotti and Y. Lu and P. Balaji and M. Taufer", title = "Memory-Efficient and Skew-Tolerant {MapReduce} Over {MPI} for Supercomputing Systems", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "12", pages = "2734--2748", year = "2020", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Sat Aug 15 14:52:38 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Gawande:2020:SDL, author = "Nitin A. Gawande and Jeff A. Daily and Charles Siegel and Nathan R. Tallent and Abhinav Vishnu", title = "Scaling Deep Learning workloads: {NVIDIA DGX-1\slash Pascal} and {Intel Knights Landing}", journal = j-FUT-GEN-COMP-SYS, volume = "108", number = "??", pages = "1162--1172", month = jul, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2018.04.073", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jun 19 07:44:16 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X17318599", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Ghazimirsaeed:2020:CAM, author = "S. Mahdieh Ghazimirsaeed and Seyed H. Mirsadeghi and Ahmad Afsahi", title = "Communication-aware message matching in {MPI}", journal = j-CCPE, volume = "32", number = "3", pages = "e4862:1--e4862:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4862", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "21 September 2018", } @Article{Gonzalez-Dominguez:2020:CJA, author = "Jorge Gonz{\'a}lez-Dom{\'\i}nguez and Roberto R. Exp{\'o}sito and Ver{\'o}nica Bol{\'o}n-Canedo", title = "{CUDA-JMI}: Acceleration of feature selection on heterogeneous systems", journal = j-FUT-GEN-COMP-SYS, volume = "102", number = "??", pages = "426--436", month = jan, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.08.031", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Feb 10 12:55:04 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19312968", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Gutierrez:2020:MAP, author = "Samuel K. Guti{\'e}rrez and Dorian C. Arnold and Kei Davis and Patrick McCormick", title = "On the memory attribution problem: a solution and case study using {MPI}", journal = j-CCPE, volume = "32", number = "3", pages = "e5159:1--e5159:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5159", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "04 February 2019", } @Article{Hagedorn:2020:AHP, author = "Bastian Hagedorn and Johannes Lenfers and Thomas K{\oe}hler and Xueying Qin and Sergei Gorlatch and Michel Steuwer", title = "Achieving high-performance the functional way: a functional pearl on expressing high-performance optimizations as rewrite strategies", journal = j-PACMPL, volume = "4", number = "ICFP", pages = "92:1--92:29", month = aug, year = "2020", DOI = "https://doi.org/10.1145/3408974", bibdate = "Tue Mar 30 08:10:48 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pacmpl.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3408974", abstract = "Optimizing programs to run efficiently on modern parallel hardware is hard but crucial for many applications. The predominantly used imperative languages --- like C or OpenCL --- force the programmer to intertwine the code describing functionality and \ldots{}", acknowledgement = ack-nhfb, articleno = "92", fjournal = "Proceedings of the ACM on Programming Languages", journal-URL = "https://pacmpl.acm.org/", } @Article{Hashmi:2020:FXZ, author = "Jahanzeb Maqbool Hashmi and Ching-Hsiang Chu and Sourav Chakraborty and Mohammadreza Bayatpour and Hari Subramoni and Dhabaleswar K. Panda", title = "{FALCON-X}: Zero-copy {MPI} derived datatype processing on modern {CPU} and {GPU} architectures", journal = j-J-PAR-DIST-COMP, volume = "144", number = "??", pages = "1--13", month = oct, year = "2020", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2020.05.008", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed May 26 16:11:02 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731520302872", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{He:2020:SMO, author = "Feng He and Xiaoshe Dong and Nianjun Zou and Weiguo Wu and Xingjun Zhang", title = "Structured mesh-oriented framework design and optimization for a coarse-grained parallel {CFD} solver based on hybrid {MPI\slash OpenMP} programming", journal = j-J-SUPERCOMPUTING, volume = "76", number = "4", pages = "2815--2841", month = apr, year = "2020", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-03063-6", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jul 25 07:17:55 MDT 2020", bibsource = "http://link.springer.com/journal/11227/76/4; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Huang:2020:POL, author = "Ming Hsiang Huang and Wuu Yang", title = "{PFACC}: an {OpenACC}-like programming model for irregular nested parallelism", journal = j-SPE, volume = "50", number = "10", pages = "1877--1904", month = oct, year = "2020", CODEN = "SPEXBL", DOI = "https://doi.org/10.1002/spe.2868", ISSN = "0038-0644 (print), 1097-024X (electronic)", ISSN-L = "0038-0644", bibdate = "Fri Feb 26 08:59:23 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/spe.bib", acknowledgement = ack-nhfb, ajournal = "Softw. Pract. Exp.", fjournal = "Software --- Practice and Experience", journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X", onlinedate = "09 July 2020", } @Article{Jaksic:2020:HPF, author = "Zoran Jak{\v{s}}i{\'c} and Nicola Cadenelli and David Buchaca Prats and Jord{\`a} Polo and Josep Llu{\'{\i}}s Berral Garcia and David Carrera Perez", title = "A highly parameterizable framework for Conditional Restricted {Boltzmann} Machine based workloads accelerated with {FPGAs} and {OpenCL}", journal = j-FUT-GEN-COMP-SYS, volume = "104", number = "??", pages = "201--211", month = mar, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.10.025", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Feb 10 12:55:06 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19313676", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Kang:2020:IMC, author = "Q. Kang and S. Lee and K. Hou and R. Ross and A. Agrawal and A. Choudhary and W. Liao", title = "Improving {MPI} Collective {I/O} for High Volume Non-Contiguous Requests With Intra-Node Aggregation", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "11", pages = "2682--2695", year = "2020", CODEN = "ITDSEO", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Sat Aug 15 14:52:38 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Kirkham:2020:FEM, author = "Jake Kirkham and Tyler Sorensen and Esin Tureci and Margaret Martonosi", title = "Foundations of empirical memory consistency testing", journal = j-PACMPL, volume = "4", number = "OOPSLA", pages = "226:1--226:29", month = nov, year = "2020", DOI = "https://doi.org/10.1145/3428294", bibdate = "Tue Mar 30 08:10:50 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pacmpl.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3428294", abstract = "Modern memory consistency models are complex, and it is difficult to reason about the relaxed behaviors that current systems allow. Programming languages, such as C and OpenCL, offer a memory model interface that developers can use to safely write \ldots{}", acknowledgement = ack-nhfb, articleno = "226", fjournal = "Proceedings of the ACM on Programming Languages", journal-URL = "https://pacmpl.acm.org/", } @Article{Klinkenberg:2020:CRL, author = "Jannis Klinkenberg and Philipp Samfass and Michael Bader and Christian Terboven and Matthias S. M{\"u}ller", title = "{CHAMELEON}: Reactive Load Balancing for Hybrid {MPI + OpenMP} Task-Parallel Applications", journal = j-J-PAR-DIST-COMP, volume = "138", number = "??", pages = "55--64", month = apr, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:11 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519305180", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Korch:2020:ILE, author = "Matthias Korch and Tim Werner", title = "Improving locality of explicit one-step methods on {GPUs} by tiling across stages and time steps", journal = j-FUT-GEN-COMP-SYS, volume = "102", number = "??", pages = "889--901", month = jan, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.07.075", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Mon Feb 10 12:55:04 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19307186", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Larrea:2020:EPM, author = "Ver{\'o}nica G. Vergara Larrea and Reuben D. Budiardja and Rahulkumar Gayatri and Christopher Daley and Oscar Hernandez and Wayne Joubert", title = "Experiences in porting mini-applications to {OpenACC} and {OpenMP} on heterogeneous systems", journal = j-CCPE, volume = "32", number = "20", pages = "e5780:1--e5780:??", day = "25", month = oct, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5780", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:20 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "24 April 2020", } @Article{Levy:2020:UVA, author = "Scott Levy and Kurt B. Ferreira and Patrick Widener", title = "The unexpected virtue of almost: Exploiting {MPI} collective operations to approximately coordinate checkpoints", journal = j-CCPE, volume = "32", number = "3", pages = "e4890:1--e4890:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4890", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "09 September 2018", } @Article{Li:2020:OOS, author = "Ting Li and Lawrence V. Stanislawski and Tyler Brockmeyer and Shaowen Wang and Ethan Shavers", title = "\pkg{OpenCLC}: an open-source software tool for similarity assessment of linear hydrographic features", journal = j-SOFTWAREX, volume = "11", number = "??", pages = "Article 100401", month = jan # "\slash " # jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1016/j.softx.2020.100401", ISSN = "2352-7110", ISSN-L = "2352-7110", bibdate = "Fri Apr 9 16:04:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/softwarex.bib", URL = "http://www.sciencedirect.com/science/article/pii/S2352711018302747", acknowledgement = ack-nhfb, fjournal = "SoftwareX", journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues", } @Article{Li:2020:SLF, author = "Qinbo Li and Nima Khademi Kalantari", title = "Synthesizing light field from a single image with variable {MPI} and two network fusion", journal = j-TOG, volume = "39", number = "6", pages = "229:1--229:10", month = nov, year = "2020", CODEN = "ATGRDF", DOI = "https://doi.org/10.1145/3414685.3417785", ISSN = "0730-0301 (print), 1557-7368 (electronic)", ISSN-L = "0730-0301", bibdate = "Sun Mar 28 08:21:45 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tog.bib", URL = "https://dl.acm.org/doi/10.1145/3414685.3417785", abstract = "We propose a learning-based approach to synthesize a light field with a small baseline from a single image. We synthesize the novel view images by first using a convolutional neural network (CNN) to promote the input image into a layered representation \ldots{}", acknowledgement = ack-nhfb, articleno = "229", fjournal = "ACM Transactions on Graphics", journal-URL = "https://dl.acm.org/loi/tog", } @Article{Liang:2020:AMD, author = "Jianguo Liang and Rong Hua and Hao Zhang and Wenqiang Zhu and You Fu", title = "Accelerated molecular dynamics simulation of Silicon Crystals on {TaihuLight} using {OpenACC}", journal = j-PARALLEL-COMPUTING, volume = "99", number = "??", pages = "Article 102667", month = nov, year = "2020", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2020.102667", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:36:02 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300600", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Liao:2020:DCS, author = "Xiaofei Liao and Long Zheng and Binsheng Zhang and Yu Zhang and Hai Jin and Xuanhua Shi and Yi Lin", title = "Dynamic cluster strategy for hierarchical rollback-recovery protocols in {MPI} {HPC} applications", journal = j-CCPE, volume = "32", number = "3", pages = "e4173:1--e4173:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4173", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "24 May 2017", } @Article{Lin:2020:EAM, author = "Bo Lin and Chijie Zhuang and Zhenning Cai and Rong Zeng and Weizhu Bao", title = "An efficient and accurate {MPI}-based parallel simulator for streamer discharges in three dimensions", journal = j-J-COMPUT-PHYS, volume = "401", number = "??", pages = "Article 109026", day = "15", month = jan, year = "2020", CODEN = "JCTPAH", ISSN = "0021-9991 (print), 1090-2716 (electronic)", ISSN-L = "0021-9991", bibdate = "Mon Mar 9 18:28:21 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jcomputphys2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0021999119307326", acknowledgement = ack-nhfb, fjournal = "Journal of Computational Physics", journal-URL = "http://www.sciencedirect.com/science/journal/00219991", } @Article{Lin:2020:GTD, author = "Huanxin Lin and Cho-Li Wang", title = "On-{GPU} thread-data remapping for nested branch divergence", journal = j-J-PAR-DIST-COMP, volume = "139", number = "??", pages = "75--86", month = may, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:12 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731518308967", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Losada:2020:FTM, author = "Nuria Losada and Patricia Gonz{\'a}lez and Mar{\'{\i}}a J. Mart{\'{\i}}n and George Bosilca and Aur{\'e}lien Bouteiller and Keita Teranishi", title = "Fault tolerance of {MPI} applications in exascale systems: the {ULFM} solution", journal = j-FUT-GEN-COMP-SYS, volume = "106", number = "??", pages = "467--481", month = may, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2020.01.026", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jun 19 07:44:13 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X1930860X", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Lu:2020:GQO, author = "Q. Lu and J. Yao and H. Guan and P. Gao", title = "{gQoS}: a {QoS}-Oriented {GPU} Virtualization with Adaptive Capacity Sharing", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "4", pages = "843--855", month = apr, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2948753", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", keywords = "Cloud computing; cloud computing; GPU virtualization; Graphics processing units; Hardware; QoS control; Quality of service; Resource management; resource scheduling; Virtual machining; Virtualization", } @Article{Mantas:2020:HOC, author = "Jos{\'e} M. Mantas and Francesco Vecil", title = "Hybrid {OpenMP--CUDA} parallel implementation of a deterministic solver for ultrashort {DG-MOSFETs}", journal = j-IJHPCA, volume = "34", number = "1", pages = "81--102", day = "1", month = jan, year = "2020", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342019879985", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jul 25 09:38:31 MDT 2020", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019879985", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", } @Article{Mena:2020:GAS, author = "Hermann Mena and Lena-Maria Pfurtscheller and Tony Stillfjord", title = "{GPU} acceleration of splitting schemes applied to differential matrix equations", journal = j-NUMER-ALGORITHMS, volume = "83", number = "1", pages = "395--419", month = jan, year = "2020", CODEN = "NUALEG", DOI = "https://doi.org/10.1007/s11075-019-00687-w", ISSN = "1017-1398 (print), 1572-9265 (electronic)", ISSN-L = "1017-1398", bibdate = "Wed Jan 22 08:40:22 MST 2020", bibsource = "http://link.springer.com/journal/11075/83/1; https://www.math.utah.edu/pub/tex/bib/numeralgorithms.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://link.springer.com/content/pdf/10.1007/s11075-019-00687-w.pdf", acknowledgement = ack-nhfb, fjournal = "Numerical Algorithms", journal-URL = "http://link.springer.com/journal/11075", } @Article{Mofrad:2020:GNA, author = "Mohammad Hasanzadeh Mofrad and Rami Melhem and Yousuf Ahmad and Mohammad Hammoud", title = "{Graphite}: a {NUMA}-aware {HPC} system for graph analytics based on a new {MPI * X} parallelism model", journal = j-PROC-VLDB-ENDOWMENT, volume = "13", number = "6", pages = "783--797", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.14778/3380750.3380751", ISSN = "2150-8097", bibdate = "Thu Apr 2 10:51:28 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/vldbe.bib", URL = "https://dl.acm.org/doi/abs/10.14778/3380750.3380751", abstract = "In this paper, we propose a new parallelism model denoted as MPI * X and suggest a linear algebra-based graph analytics system, namely, Graphite, which effectively employs it. MPI * X promotes thread-based partitioning to distribute computation and \ldots{}", acknowledgement = ack-nhfb, fjournal = "Proceedings of the VLDB Endowment", journal-URL = "https://dl.acm.org/loi/pvldb", } @Article{Mu:2020:OOB, author = "Jiandong Mu and Wei Zhang and Hao Liang and Sharad Sinha", title = "Optimizing {OpenCL}-Based {CNN} Design on {FPGA} with Comprehensive Design Space Exploration and Collaborative Performance Modeling", journal = j-TRETS, volume = "13", number = "3", pages = "13:1--13:28", month = sep, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397514", ISSN = "1936-7406 (print), 1936-7414 (electronic)", ISSN-L = "1936-7406", bibdate = "Sat Sep 5 18:51:36 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/trets.bib", URL = "https://dl.acm.org/doi/10.1145/3397514", abstract = "Recent success in applying convolutional neural networks (CNNs) to object detection and classification has sparked great interest in accelerating CNNs using hardware-like field-programmable gate arrays (FPGAs). However, finding an efficient FPGA design \ldots{}", acknowledgement = ack-nhfb, articleno = "13", fjournal = "ACM Transactions on Reconfigurable Technology and Systems (TRETS)", journal-URL = "https://dl.acm.org/loi/trets", } @Article{Nandal:2020:NSG, author = "P. Nandal and R. P. Sharma", title = "Numerical simulation on {GPUs} with {CUDA} to study nonlinear dynamics of whistler wave and its turbulent spectrum in radiation belts", journal = j-COMP-PHYS-COMM, volume = "254", number = "??", pages = "Article 107214", month = sep, year = "2020", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2020.107214", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Fri Jun 19 07:19:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465520300497", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Naranjo:2020:ASC, author = "Diana M. Naranjo and Sebasti{\'a}n Risco and Carlos de Alfonso and Alfonso P{\'e}rez and Ignacio Blanquer and Germ{\'a}n Molt{\'o}", title = "Accelerated serverless computing based on {GPU} virtualization", journal = j-J-PAR-DIST-COMP, volume = "139", number = "??", pages = "32--42", month = may, year = "2020", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2020.01.004", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:12 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303533", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Petrovic:2020:BSH, author = "Filip Petrovi{\v{c}} and David St{\v{r}}el{\'a}k and Jana Hozzov{\'a} and Jaroslav Ol'ha and Richard Trembeck{\'y} and Siegfried Benkner and Ji{\v{r}}{\'{\i}} Filipovi{\v{c}}", title = "A benchmark set of highly-efficient {CUDA} and {OpenCL} kernels and its dynamic autotuning with {Kernel Tuning Toolkit}", journal = j-FUT-GEN-COMP-SYS, volume = "108", number = "??", pages = "161--177", month = jul, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2020.02.069", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jun 19 07:44:16 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19327360", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Prades:2020:MRU, author = "Javier Prades and Baldomero Imbern{\'o}n and Carlos Rea{\~n}o and Jorge Pe{\~n}a-Garc{\'\i}a and Jose Pedro Cer{\'o}n-Carrasco and Federico Silla and Horacio P{\'e}rez-S{\'a}nchez", title = "Maximizing resource usage in multifold molecular dynamics with {rCUDA}", journal = j-IJHPCA, volume = "34", number = "1", pages = "5--19", day = "1", month = jan, year = "2020", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342019857131", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jul 25 09:38:31 MDT 2020", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019857131", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", } @Article{Rasch:2020:DHL, author = "Ari Rasch and Julian Bigge and Martin Wrodarczyk and Richard Schulze and Sergei Gorlatch", title = "{dOCAL}: high-level distributed programming with {OpenCL} and {CUDA}", journal = j-J-SUPERCOMPUTING, volume = "76", number = "7", pages = "5117--5138", month = jul, year = "2020", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-019-02829-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Sat Jul 25 07:17:59 MDT 2020", bibsource = "http://link.springer.com/journal/11227/76/7; https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Reis:2020:CMC, author = "Lu{\'\i}s Reis and Jo{\~a}o Bispo and Jo{\~a}o M. P. Cardoso", title = "Compilation of {MATLAB} computations to {CPU\slash GPU} via {C\slash OpenCL} generation", journal = j-CCPE, volume = "32", number = "22", pages = "e5854:1--e5854:??", day = "25", month = nov, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5854", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:22 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "01 June 2020", } @Article{Renaud:2020:IMS, author = "Nicolas Renaud and Yong Jung and Vasant Honavar and Cunliang Geng and Alexandre M. J. J. Bonvin and Li C. Xue", title = "\pkg{iScore}: an {MPI} supported software for ranking protein-protein docking models based on a random walk graph kernel and support vector machines", journal = j-SOFTWAREX, volume = "11", number = "??", pages = "Article 100462", month = jan # "\slash " # jun, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1016/j.softx.2020.100462", ISSN = "2352-7110", ISSN-L = "2352-7110", bibdate = "Fri Apr 9 16:04:39 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/softwarex.bib", URL = "http://www.sciencedirect.com/science/article/pii/S2352711019303061", acknowledgement = ack-nhfb, fjournal = "SoftwareX", journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues", } @Article{Russek:2020:SLC, author = "Pawe{\l} Russek and Pawe{\l} Russek and Ernest Jamro and Agnieszka Dabrowska-Boruch and Kazimierz Wiatr", title = "A study of the loops control for reconfigurable computing with {OpenCL} in the {LABS} local search problem", journal = j-IJHPCA, volume = "34", number = "1", pages = "103--114", day = "1", month = jan, year = "2020", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342019868515", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Sat Jul 25 09:38:31 MDT 2020", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019868515", acknowledgement = ack-nhfb, fjournal = "International Journal of High Performance Computing Applications", } @Article{Salinas:2020:FEI, author = "{\'A}lvaro Salinas and Claudio Torres and Orlando Ayala", title = "A fast and efficient integration of boundary conditions into a unified {CUDA} Kernel for a shallow water solver lattice {Boltzmann} Method", journal = j-COMP-PHYS-COMM, volume = "249", number = "??", pages = "Article 107009", month = apr, year = "2020", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Mar 2 13:57:36 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465519303443", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Samfass:2020:LTO, author = "Philipp Samfass and Tobias Weinzierl and Dominic E. Charrier and Michael Bader", title = "Lightweight task offloading exploiting {MPI} wait times for parallel adaptive mesh refinement", journal = j-CCPE, volume = "32", number = "24", pages = "e5916:1--e5916:??", day = "25", month = dec, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5916", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:23 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "09 July 2020", } @Article{Shekofteh:2020:CEC, author = "S.-Kazen Shekofteh and Hamid Noori and Mahmoud Naghibzadeh and Holger Fr{\"o}ning and Hadi Sadog Yazdi", title = "{cCUDA}: Effective Co-Scheduling of Concurrent Kernels on {GPUs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "4", pages = "766--778", month = apr, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2944602", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Analytical models; Benchmark testing; concurrent kernel execution; Graphics processing units; Hardware; Kernel; resource management; Scheduling; scheduling; stream", } @Article{Shen:2020:GPC, author = "Qi Shen and Craig Sharp and Richard Davison and Gary Ushaw and Rajiv Ranjan and Albert Y. Zomaya and Graham Morgan", title = "A general purpose contention manager for software transactions on the {GPU}", journal = j-J-PAR-DIST-COMP, volume = "139", number = "??", pages = "1--17", month = may, year = "2020", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2019.12.018", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:12 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519301376", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Silla:2020:IPP, author = "Federico Silla and Javier Prades and Elvira Baydal and Carlos Rea{\~n}o", title = "Improving the performance of physics applications in atom-based clusters with {rCUDA}", journal = j-J-PAR-DIST-COMP, volume = "137", number = "??", pages = "160--178", month = mar, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:11 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304034", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Skjellum:2020:FSI, author = "Anthony Skjellum and Purushotham V. Bangalore and Ryan E. Grant", title = "Foreword to the Special Issue of the {Workshop on Exascale MPI (ExaMPI 2017)}", journal = j-CCPE, volume = "32", number = "3", pages = "e5459:1--e5459:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5459", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "18 July 2019", } @Article{Spiliotis:2020:PII, author = "Iraklis M. Spiliotis and Michael P. Bekakos and Yiannis S. Boutalis", title = "Parallel implementation of the {Image Block Representation} using {OpenMP}", journal = j-J-PAR-DIST-COMP, volume = "137", number = "??", pages = "134--147", month = mar, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:11 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519307622", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Stpiczynski:2020:ALB, author = "Przemys{\l}aw Stpiczy{\'n}ski", title = "Algorithmic and language-based optimization of {Marsa-LFIB4} pseudorandom number generator using {OpenMP}, {OpenACC} and {CUDA}", journal = j-J-PAR-DIST-COMP, volume = "137", number = "??", pages = "238--245", month = mar, year = "2020", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2019.12.004", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:11 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304885", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Sun:2020:RTS, author = "J. Sun and N. Guan and F. Li and H. Gao and C. Shi and W. Yi", title = "Real-Time Scheduling and Analysis of {OpenMP} {DAG} Tasks Supporting Nested Parallelism", journal = j-IEEE-TRANS-COMPUT, volume = "69", number = "9", pages = "1335--1348", month = sep, year = "2020", CODEN = "ITCOB4", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Wed Aug 12 14:58:16 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Tiotto:2020:OCO, author = "E. Tiotto and B. Mahjour and W. Tsang and X. Xue and T. Islam and W. Chen", title = "{OpenMP 4.5} compiler optimization for {GPU} offloading", journal = j-IBM-JRD, volume = "64", number = "3/4", pages = "14:1--14:11", month = may # "\slash " # jul, year = "2020", CODEN = "IBMJAE", DOI = "https://doi.org/10.1147/JRD.2019.2962428", ISSN = "0018-8646 (print), 2151-8556 (electronic)", ISSN-L = "0018-8646", bibdate = "Wed Jun 3 18:35:26 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ibmjrd.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/super.bib", acknowledgement = ack-nhfb, fjournal = "IBM Journal of Research and Development", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520", } @Article{Traff:2020:SIS, author = "Jesper Larsson Tr{\"a}ff and Torsten Hoefler", title = "Special issue: Selected papers from {EuroMPI 2019}", journal = j-PARALLEL-COMPUTING, volume = "99", number = "??", pages = "Article 102695", month = nov, year = "2020", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2020.102695", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:36:02 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300855", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tsiolakis:2020:NPG, author = "Vasileios Tsiolakis and Matteo Giacomini and Ruben Sevilla and Carsten Othmer and Antonio Huerta", title = "Nonintrusive proper generalised decomposition for parametrised incompressible flow problems in {OpenFOAM}", journal = j-COMP-PHYS-COMM, volume = "249", number = "??", pages = "Article 107013", month = apr, year = "2020", CODEN = "CPHCBZ", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Mar 2 13:57:36 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465519303479", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Turchetto:2020:GDS, author = "M. Turchetto and A. D. Pal{\`u} and R. Vacondio", title = "A General Design for a Scalable {MPI-GPU} Multi-Resolution {2D} Numerical Solver", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "5", pages = "1036--1047", month = may, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2961909", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 20 10:08:58 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", keywords = "AMR; CUDA; dynamic load balancing; hilbert space filling curves; MPI; multi-GPU; multi-resolution grid; shallow water equations (SWE)", } @Article{Valero-Lara:2020:SFA, author = "Pedro Valero-Lara and Sandra Catal{\'a}n and Xavier Martorell and Tetsuzo Usui and Jes{\'u}s Labarta", title = "{sLASs}: a fully automatic auto-tuned linear algebra library based on {OpenMP} extensions implemented in {OmpSs} ({LASs} Library)", journal = j-J-PAR-DIST-COMP, volume = "138", number = "??", pages = "153--171", month = apr, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:11 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303417", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Wang:2020:EPE, author = "X. Wang and X. Qian and A. Knoll and K. Huang", title = "Efficient Performance Estimation and Work-Group Size Pruning for {OpenCL} Kernels on {GPUs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "5", pages = "1089--1106", month = may, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2958343", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Thu Feb 20 10:08:58 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", keywords = "Analytical models; Estimation; GPU; Graphics processing units; Hardware; Kernel; Measurement; OpenCL; performance estimation; performance tuning; Runtime; work-group size", } @Article{Weng:2020:CMS, author = "Tien-Hsiung Weng and Kuan-Ching Li and Zhiliu Yang and Chen Liu", title = "On the code modernization of shared sampling alpha matting with {OpenMP}", journal = j-FUT-GEN-COMP-SYS, volume = "107", number = "??", pages = "177--191", month = jun, year = "2020", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2019.12.012", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Fri Jun 19 07:44:14 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19314116", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{White:2020:OPP, author = "Sam White and Laxmikant V. Kale", title = "Optimizing point-to-point communication between adaptive {MPI} endpoints in shared memory", journal = j-CCPE, volume = "32", number = "3", pages = "e4467:1--e4467:??", day = "10", month = feb, year = "2020", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.4467", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Wed Mar 31 07:52:13 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurr. Comput.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "12 March 2018", } @Article{Yu:2020:EPW, author = "C. Yu and S. Tsao", title = "Efficient and Portable Workgroup Size Tuning", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "2", pages = "455--469", month = feb, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2937295", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "automatic performance tuning; Computational modeling; Graphics processing units; Hardware; Indexes; Kernel; microbenchmarking; OpenCL; Performance evaluation; Tuning; workgroup size selection", } @Article{Zarebavani:2020:CCB, author = "B. Zarebavani and F. Jafarinejad and M. Hashemi and S. Salehkaleybar", title = "{cuPC}: {CUDA}-Based Parallel {PC} Algorithm for Causal Structure Learning on {GPU}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "3", pages = "530--542", month = mar, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2939126", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds", keywords = "Bayes methods; Bayesian networks; causal discovery; CUDA; GPU; Graphical models; Graphics processing units; machine learning; Markov processes; Parallel algorithms; parallel processing; PC algorithm; Scalability", } @Article{Zhang:2020:CTE, author = "T. Zhang and X. Liu and X. Wang and A. Walid", title = "{cuTensor-Tubal}: Efficient Primitives for Tubal-Rank Tensor Learning Operations on {GPUs}", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "31", number = "3", pages = "595--610", month = mar, year = "2020", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2019.2940192", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Wed Jan 22 06:09:50 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", keywords = "Computational modeling; Computer architecture; cuTensor-tubal library; Frequency-domain analysis; GPU; Graphics processing units; Libraries; Low-tubal-rank tensor decomposition; Matrix decomposition; t-SVD; tensor completion", } @Article{Zhou:2020:CHM, author = "Huan Zhou and Jos{\'e} Gracia and Naweiluo Zhou and Ralf Schneider", title = "Collectives in hybrid {MPI+MPI} code: Design, practice and performance", journal = j-PARALLEL-COMPUTING, volume = "99", number = "??", pages = "Article 102669", month = nov, year = "2020", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2020.102669", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:36:02 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300612", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Zhou:2020:EOP, author = "Hongyang Zhou and G{\'a}bor T{\'o}th", title = "Efficient {OpenMP} parallelization to a complex {MPI} parallel magnetohydrodynamics code", journal = j-J-PAR-DIST-COMP, volume = "139", number = "??", pages = "65--74", month = may, year = "2020", CODEN = "JPDCER", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Wed Mar 18 09:26:12 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304903", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Aldinucci:2021:PPS, author = "Marco Aldinucci and Valentina Cesare and Iacopo Colonnelli and Alberto Riccardo Martinelli and Gianluca Mittone and Barbara Cantalupo and Carlo Cavazzoni and Maurizio Drocco", title = "Practical parallelization of scientific applications with {OpenMP}, {OpenACC} and {MPI}", journal = j-J-PAR-DIST-COMP, volume = "157", number = "??", pages = "13--29", month = nov, year = "2021", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2021.05.017", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Feb 10 06:39:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731521001295", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Andoh:2021:AMM, author = "Yoshimichi Andoh and Shin-ichi Ichikawa and Tatsuya Sakashita and Noriyuki Yoshii and Susumu Okazaki", title = "Algorithm to minimize {MPI} communications in the parallelized fast multipole method combined with molecular dynamics calculations", journal = j-J-COMPUT-CHEM, volume = "42", number = "15", pages = "1073--1087", day = "5", month = jun, year = "2021", CODEN = "JCCHDD", DOI = "https://doi.org/10.1002/jcc.26524", ISSN = "0192-8651 (print), 1096-987X (electronic)", ISSN-L = "0192-8651", bibdate = "Mon May 17 16:26:14 MDT 2021", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/jcomputchem2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "J. Comput. Chem.", fjournal = "Journal of Computational Chemistry", journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651", onlinedate = "29 March 2021", } @Article{Antonelli:2021:CBI, author = "L. Antonelli and E. Francomano and F. Gregoretti", title = "A {CUDA}-based implementation of an improved {SPH} method on {GPU}", journal = j-APPL-MATH-COMP, volume = "409", number = "??", pages = "Article 125482", day = "15", month = nov, year = "2021", CODEN = "AMHCBQ", DOI = "https://doi.org/10.1016/j.amc.2020.125482", ISSN = "0096-3003 (print), 1873-5649 (electronic)", ISSN-L = "0096-3003", bibdate = "Mon Jan 31 07:58:57 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/applmathcomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0096300320304410", acknowledgement = ack-nhfb, fjournal = "Applied Mathematics and Computation", journal-URL = "http://www.sciencedirect.com/science/journal/00963003", } @Article{Betcke:2021:DHP, author = "Timo Betcke and Matthew W. Scroggs", title = "Designing a High-Performance Boundary Element Library With {OpenCL} and {Numba}", journal = j-COMPUT-SCI-ENG, volume = "23", number = "4", pages = "18--28", month = jul # "\slash " # aug, year = "2021", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2021.3085420", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Thu Jul 29 07:00:57 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Cesarini:2021:CRT, author = "D. Cesarini and A. Bartolini and P. Bonf{\`a} and C. Cavazzoni and L. Benini", title = "{COUNTDOWN}: a Run-Time Library for Performance-Neutral Energy Saving in {MPI} Applications", journal = j-IEEE-TRANS-COMPUT, volume = "70", number = "5", pages = "682--695", year = "2021", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2020.2995269", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Apr 8 06:29:24 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Chapp:2021:IDS, author = "Dylan Chapp and Nigel Tan and Sanjukta Bhowmick and Michela Taufer", title = "Identifying Degree and Sources of Non-Determinism in {MPI} Applications Via Graph Kernels", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "32", number = "12", pages = "2936--2952", month = dec, year = "2021", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2021.3081530", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Jun 4 09:55:50 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Chen:2021:CCR, author = "Genlang Chen and Jiajian Zhang and Chaoyi Pang", title = "{CRState}: checkpoint/restart of {OpenCL} program for in-kernel applications", journal = j-J-SUPERCOMPUTING, volume = "77", number = "6", pages = "5426--5467", month = jun, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03460-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:20:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03460-2", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 06 November 2020 Pages: 5426 - 5467", } @Article{Dalcin:2021:MSU, author = "Lisandro Dalcin and Yao-Lung L. Fang", title = "{mpi4py}: Status Update After 12 Years of Development", journal = j-COMPUT-SCI-ENG, volume = "23", number = "4", pages = "47--54", month = jul # "\slash " # aug, year = "2021", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2021.3083216", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Thu Jul 29 07:00:57 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Dosanjh:2021:IEM, author = "Matthew G. F. Dosanjh and Andrew Worley and Derek Schafer and Prema Soundararajan and Sheikh Ghafoor and Anthony Skjellum and Purushotham V. Bangalore and Ryan E. Grant", title = "Implementation and evaluation of {MPI 4.0} partitioned communication libraries", journal = j-PARALLEL-COMPUTING, volume = "108", number = "??", pages = "??--??", month = dec, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102827", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:17 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000752", acknowledgement = ack-nhfb, articleno = "102827", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Ferreira:2021:EMR, author = "Kurt B. Ferreira and Scott Levy", title = "Evaluating {MPI} resource usage summary statistics", journal = j-PARALLEL-COMPUTING, volume = "108", number = "??", pages = "??--??", month = dec, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102825", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:17 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000739", acknowledgement = ack-nhfb, articleno = "102825", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Gong:2021:TDG, author = "Dunwei Gong and Baicai Sun and Xiangjuan Yao and Tian Tian", title = "Test Data Generation for Path Coverage of {MPI} Programs Using {SAEO}", journal = j-TOSEM, volume = "30", number = "2", pages = "17:1--17:37", month = mar, year = "2021", CODEN = "ATSMER", DOI = "https://doi.org/10.1145/3423132", ISSN = "1049-331X (print), 1557-7392 (electronic)", ISSN-L = "1049-331X", bibdate = "Thu Mar 18 06:18:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tosem.bib", URL = "https://dl.acm.org/doi/10.1145/3423132", abstract = "Message-passing interface (MPI) programs, a typical kind of parallel programs, have been commonly used in various applications. However, it generally takes exhaustive computation to run these programs when generating test data to test them. In this \ldots{}", acknowledgement = ack-nhfb, articleno = "17", fjournal = "ACM Transactions on Software Engineering and Methodology", journal-URL = "https://dl.acm.org/loi/tosem", } @Article{Hahne:2021:APP, author = "Jens Hahne and Stephanie Friedhoff and Matthias Bolten", title = "{Algorithm 1016}: {PyMGRIT}: a {Python} Package for the Parallel-in-time Method {MGRIT}", journal = j-TOMS, volume = "47", number = "2", pages = "19:1--19:22", month = apr, year = "2021", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3446979", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Tue Apr 27 08:23:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3446979", abstract = "In this article, we introduce the Python framework PyMGRIT, which implements the multigrid-reduction-in-time (MGRIT) algorithm for solving (non-)linear systems arising from the discretization of time-dependent problems. The MGRIT algorithm is a reduction-based iterative method that allows parallel-in-time simulations, i.e., calculating multiple time steps simultaneously in a simulation, using a time-grid hierarchy. The PyMGRIT framework includes many different variants of the MGRIT algorithm, ranging from different multigrid cycle types and relaxation schemes, various coarsening strategies, including time-only and space-time coarsening, and the ability to utilize different time integrators on different levels in the multigrid hierarchy. The comprehensive documentation with tutorials and many examples and the fully documented code allow an easy start into the work with the package. The functionality of the code is ensured by automated serial and parallel tests using continuous integration. PyMGRIT supports serial runs suitable for prototyping and testing of new approaches, as well as parallel runs using the Message Passing Interface (MPI). In this manuscript, we describe the implementation of the MGRIT algorithm in PyMGRIT and present the usage from both a user and a developer point of view. Three examples illustrate different aspects of the package itself, especially running tests with pure time parallelism, as well as space-time parallelism through the coupling of PyMGRIT with PETSc or Firedrake.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Halbiniak:2021:EOH, author = "Kamil Halbiniak and Lukasz Szustak and Tomasz Olas and Roman Wyrzykowski and Pawel Gepner", title = "Exploration of {OpenCL} Heterogeneous Programming for Porting Solidification Modeling to {CPU-GPU} Platforms", journal = j-CCPE, volume = "33", number = "4", pages = "e6011:1--e6011:??", day = "25", month = feb, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.6011", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue May 18 08:31:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "09 October 2020", } @Article{Ho:2021:GFD, author = "Nhut-Minh Ho and Himeshi {De Silva} and Weng-Fai Wong", title = "{GRAM}: a Framework for Dynamically Mixing Precisions in {GPU} Applications", journal = j-TACO, volume = "18", number = "2", pages = "19:1--19:24", month = mar, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3441830", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Sat Mar 20 17:25:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", URL = "https://dl.acm.org/doi/10.1145/3441830", abstract = "This article presents GRAM (GPU-based Runtime Adaption for Mixed-precision) a framework for the effective use of mixed precision arithmetic for CUDA programs. Our method provides a fine-grain tradeoff between output error and performance. It can create many variants that satisfy different accuracy requirements by assigning different groups of threads to different precision levels adaptively at runtime. To widen the range of applications that can benefit from its approximation, GRAM comes with an optional half-precision approximate math library. Using GRAM, we can trade off precision for any performance improvement of up to 540\%, depending on the application and accuracy requirement.", acknowledgement = ack-nhfb, articleno = "19", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "https://dl.acm.org/loi/taco", } @Article{Hori:2021:ISM, author = "Atsushi Hori and Emmanuel Jeannot and George Bosilca and Takahiro Ogura and Balazs Gerofi and Jie Yin and Yutaka Ishikawa", title = "An international survey on {MPI} users", journal = j-PARALLEL-COMPUTING, volume = "108", number = "??", pages = "??--??", month = dec, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102853", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:17 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000983", acknowledgement = ack-nhfb, articleno = "102853", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Huang:2021:LBI, author = "Liang-Tsung Huang and Kai-Cheng Wei and Jian-An Wang", title = "A lightweight {BLASTP} and its implementation on {CUDA GPUs}", journal = j-J-SUPERCOMPUTING, volume = "77", number = "1", pages = "322--342", month = jan, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03267-1", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:19:58 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03267-1", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 07 April 2020 Pages: 322 - 342", } @Article{Jalowiecki:2021:BFS, author = "Konrad Ja{\l}owiecki and Marek M. Rams and Bart{\l}omiej Gardas", title = "Brute-forcing spin-glass problems with {CUDA}", journal = j-COMP-PHYS-COMM, volume = "260", number = "??", pages = "Article 107728", month = mar, year = "2021", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2020.107728", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Mar 13 08:21:41 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S001046552030360X", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Keppens:2021:MAP, author = "Rony Keppens and Jannis Teunissen and Chun Xia and Oliver Porth", title = "{MPI-AMRVAC}: a parallel, grid-adaptive {PDE} toolkit", journal = j-COMPUT-MATH-APPL, volume = "81", number = "??", pages = "316--333", day = "1", month = jan, year = "2021", CODEN = "CMAPDK", DOI = "https://doi.org/10.1016/j.camwa.2020.03.023", ISSN = "0898-1221 (print), 1873-7668 (electronic)", bibdate = "Sat Mar 13 10:03:14 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/computmathappl2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0898122120301279", acknowledgement = ack-nhfb, fjournal = "Computers and Mathematics with Applications", journal-URL = "http://www.sciencedirect.com/science/journal/08981221", } @Article{Kim:2021:GRP, author = "Mingyu Kim and Nakhoon Baek", title = "A {3D} graphics rendering pipeline implementation based on the {openCL} massively parallel processing", journal = j-J-SUPERCOMPUTING, volume = "77", number = "7", pages = "7351--7367", month = jul, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03581-8", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:32 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03581-8", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Klemm:2021:OAH, author = "Michael Klemm and Eduardo Qui{\~n}ones and Tucker Taft and Dirk Ziegenbein and Sara Royuela", title = "The {OpenMP API} for High Integrity Systems: Moving Responsibility from Users to Vendors", journal = j-SIGADA-LETTERS, volume = "40", number = "2", pages = "48--50", month = apr, year = "2021", CODEN = "AALEE5", DOI = "https://doi.org/10.1145/3463478.3463480", ISSN = "1094-3641 (print), 1557-9476 (electronic)", ISSN-L = "0736-721X", bibdate = "Mon Jun 28 15:50:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigada.bib", URL = "https://dl.acm.org/doi/10.1145/3463478.3463480", abstract = "OpenMP is traditionally focused on boosting performance in HPC systems. However, other domains are showing an increasing interest in the use of OpenMP by virtue of key aspects introduced in recent versions of the specification: the tasking model, the accelerator model, and other features like the requires and the assumes directives, which allow defining certain contracts. One example is the safety-critical embedded domain, where several efforts have been initiated towards the adoption of OpenMP. However, the OpenMP specification states that ``application developers are responsible for correctly using the OpenMP API to produce a conforming program'',being not acceptable in high integrity systems, where aspects such as reliability and resiliency have to be ensured at different levels of criticality. In this scope, programming languages like Ada propose a different paradigm by exposing fewer features to the user, and leaving the responsibility of safely exploiting the full underlying architecture to the compiler and the runtime systems, instead. The philosophy behind this kind of model is to move the responsibility of producing correct parallel programs from users to vendors. In this panel, actors from different domains involved in the use of parallel programming models for the development of high-integrity systems share their thoughts about this topic.", acknowledgement = ack-nhfb, fjournal = "ACM SIGADA Ada Letters", journal-URL = "http://portal.acm.org/citation.cfm?id=J32", } @Article{Kohnke:2021:CFM, author = "Bartosz Kohnke and Carsten Kutzner and Andreas Beckmann and Gert Lube and Ivo Kabadshow and Holger Dachsel and Helmut Grubm{\"u}ller", title = "A {CUDA} fast multipole method with highly efficient {M2L} far field evaluation", journal = j-IJHPCA, volume = "35", number = "1", pages = "97--117", day = "1", month = jan, year = "2021", CODEN = "IHPCFL", DOI = "https://doi.org/10.1177/1094342020964857", ISSN = "1094-3420 (print), 1741-2846 (electronic)", ISSN-L = "1094-3420", bibdate = "Tue May 18 15:46:08 MDT 2021", bibsource = "http://hpc.sagepub.com/; https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/ijsa.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://journals.sagepub.com/doi/full/10.1177/1094342020964857", acknowledgement = ack-nhfb, ajournal = "Int. J. High Perform. Comput. Appl.", fjournal = "International Journal of High Performance Computing Applications", journal-URL = "https://journals.sagepub.com/home/hpc", } @Article{Lambert:2021:OOFa, author = "Jacob Lambert and Seyong Lee and Jeffrey S. Vetter and Allen D. Malony", title = "Optimization with the {OpenACC-to-FPGA} framework on the {Arria 10} and {Stratix 10} {FPGAs}", journal = j-PARALLEL-COMPUTING, volume = "104--105", number = "??", pages = "??--??", month = jul, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102784", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000417", acknowledgement = ack-nhfb, articleno = "102784", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Lambert:2021:OOFb, author = "Jacob Lambert and Seyong Lee and Jeffrey S. Vetter and Allen D. Malony", title = "Optimization with the {OpenACC}-to-{FPGA} framework on the Arria 10 and Stratix 10 {FPGAs}", journal = j-PARALLEL-COMPUTING, volume = "104--105", number = "??", pages = "??--??", month = jul, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102784", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000417", acknowledgement = ack-nhfb, articleno = "102784", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Liu:2021:BMN, author = "Feilong Liu and Claude Barthels and Spyros Blanas and Hideaki Kimura and Garret Swart", title = "Beyond {MPI}: New Communication Interfaces for Database Systems and Data-Intensive Applications", journal = j-SIGMOD, volume = "49", number = "4", pages = "12--17", month = mar, year = "2021", CODEN = "SRECD8", DOI = "https://doi.org/10.1145/3456859.3456862", ISSN = "0163-5808 (print), 1943-5835 (electronic)", ISSN-L = "0163-5808", bibdate = "Thu Mar 11 06:12:21 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigmod.bib", URL = "https://dl.acm.org/doi/10.1145/3456859.3456862", abstract = "Networks with Remote Direct Memory Access (RDMA) support are becoming increasingly common. RDMA, however, offers a limited programming interface to remote memory that consists of read, write and atomic operations. With RDMA alone, completing the most basic \ldots{}", acknowledgement = ack-nhfb, fjournal = "SIGMOD Record (ACM Special Interest Group on Management of Data)", journal-URL = "https://dl.acm.org/loi/sigmod", } @Article{Lyu:2021:FFA, author = "Xing-long Lyu and Tiexiang Li and Tsung-ming Huang and Jia-wei Lin and Wen-wei Lin and Sheng Wang", title = "{FAME}: Fast Algorithms for {Maxwell}'s Equations for Three-dimensional Photonic Crystals", journal = j-TOMS, volume = "47", number = "3", pages = "26:1--26:24", month = jun, year = "2021", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3446329", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Sun Jun 27 07:42:02 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3446329", abstract = "In this article, we propose the Fast Algorithms for Maxwell's Equations (FAME) package for solving Maxwell's equations for modeling three-dimensional photonic crystals. FAME combines the null-space free method with fast Fourier transform (FFT)-based matrix-vector multiplications to solve the generalized eigenvalue problems (GEPs) arising from Yee's discretization. The GEPs are transformed into a null-space free standard eigenvalue problem with a Hermitian positive-definite coefficient matrix. The computation times for FFT-based matrix-vector multiplications with matrices of dimension 7 million are only $ 0.33 $ and $ 3.6 \times 10^{-3} $ seconds using MATLAB with an Intel Xeon CPU and CUDA C++ programming with a single NVIDIA Tesla P100 GPU, respectively. Such multiplications significantly reduce the computational costs of the conjugate gradient method for solving linear systems. We successfully use FAME on a single P100 GPU to solve a set of GEPs with matrices of dimension more than 19 million, in 127 to 191 seconds per problem. These results demonstrate the potential of our proposed package to enable large-scale numerical simulations for novel physical discoveries and engineering applications of photonic crystals.", acknowledgement = ack-nhfb, articleno = "26", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Ma:2021:CSB, author = "Wenpeng Ma and Wu Yuan and Xiazhen Liu", title = "A Comparative Study of Block Incomplete Sparse Approximate Inverses Preconditioning on {Tesla K20} and {V100} {GPUs}", journal = j-ALGORITHMS-BASEL, volume = "14", number = "7", month = jul, year = "2021", CODEN = "ALGOCH", DOI = "https://doi.org/10.3390/a14070204", ISSN = "1999-4893 (electronic)", ISSN-L = "1999-4893", bibdate = "Fri Jul 23 15:05:28 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/algorithms.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.mdpi.com/1999-4893/14/7/204", acknowledgement = ack-nhfb, articleno = "204", fjournal = "Algorithms (Basel)", journal-URL = "https://www.mdpi.com/journal/algorithms", pagecount = "??", } @Article{Margolin:2021:TBF, author = "Alexander Margolin and Amnon Barak", title = "Tree-based fault-tolerant collective operations for {MPI}", journal = j-CCPE, volume = "33", number = "14", pages = "e5826:1--e5826:??", day = "25", month = jul, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5826", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 22 09:49:55 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurrency Computat., Pract. Exper.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "15 June 2020", } @Article{Martinez-Noriega:2021:COE, author = "Edgar Josafat Martinez-Noriega and Syunji Yazaki and Tetsu Narumi", title = "{CUDA} offloading for energy-efficient and high-frame-rate simulations using tablets", journal = j-CCPE, volume = "33", number = "2", pages = "e5488:1--e5488:??", day = "25", month = jan, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5488", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue May 18 08:31:19 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "23 August 2019", } @Article{Matwiejew:2021:QFP, author = "Edric Matwiejew and Jingbo Wang", title = "{QSW\_MPI}: a framework for parallel simulation of quantum stochastic walks", journal = j-COMP-PHYS-COMM, volume = "260", number = "??", pages = "Article 107724", month = mar, year = "2021", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2020.107724", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Mar 13 08:21:41 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465520303581", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Meyer:2021:IBH, author = "Bruno Henrique Meyer and Aurora Trinidad Ramirez Pozo and Wagner M. Nunan Zola", title = "Improving {Barnes--Hut} {$t$-SNE} Algorithm in Modern {GPU} Architectures with Random Forest {KNN} and Simulated Wide-Warp", journal = j-JETC, volume = "17", number = "4", pages = "53:1--53:26", month = oct, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447779", ISSN = "1550-4832", ISSN-L = "1550-4832", bibdate = "Tue Sep 14 06:51:04 MDT 2021", bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib; https://www.math.utah.edu/pub/tex/bib/jetc.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3447779", abstract = "The $t$-Distributed Stochastic Neighbor Embedding (t-SNE) is a widely used technique for dimensionality reduction but is limited by its scalability when applied to large datasets. Recently, BH-tSNE was proposed; this is a successful approximation that transforms a step of the original algorithm into an N-Body simulation problem that can be solved by a modified Barnes-Hut algorithm. However, this improvement still has limitations to process large data volumes (millions of records). Late studies, such as $t$-SNE-CUDA, have used GPUs to implement highly parallel BH-tSNE. In this research we have developed a new GPU BH-tSNE implementation that produces the embedding of multidimensional data points into three-dimensional space. We examine scalability issues in two of the most expensive steps of GPU BH-tSNE by using efficient memory access strategies, recent acceleration techniques, and a new approach to compute the KNN graph structure used in BH-tSNE with GPU. Our design allows up to 460\% faster execution when compared to the $t$-SNE-CUDA implementation. Although our SIMD acceleration techniques were used in a modern GPU setup, we have also verified a potential for applications in the context of multi-core processors.", acknowledgement = ack-nhfb, articleno = "53", fjournal = "ACM Journal on Emerging Technologies in Computing Systems (JETC)", journal-URL = "https://dl.acm.org/loi/jetc", } @Article{Muller:2021:MAE, author = "Stefan K. Muller and Jan Hoffmann", title = "Modeling and analyzing evaluation cost of {CUDA} kernels", journal = j-PACMPL, volume = "5", number = "POPL", pages = "25:1--25:31", month = jan, year = "2021", DOI = "https://doi.org/10.1145/3434306", bibdate = "Tue Mar 30 08:10:58 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pacmpl.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3434306", abstract = "General-purpose programming on GPUs (GPGPU) is becoming increasingly in vogue as applications such as machine learning and scientific computing demand high throughput in vector-parallel applications. NVIDIA's CUDA toolkit seeks to make GPGPU programming \ldots{}", acknowledgement = ack-nhfb, articleno = "25", fjournal = "Proceedings of the ACM on Programming Languages", journal-URL = "https://pacmpl.acm.org/", } @Article{Munch:2021:HDE, author = "Peter Munch and Katharina Kormann and Martin Kronbichler", title = "\pkg{hyper.deal}: an Efficient, Matrix-free Finite-element Library for High-dimensional Partial Differential Equations", journal = j-TOMS, volume = "47", number = "4", pages = "33:1--33:34", month = dec, year = "2021", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3469720", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Sep 29 06:58:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3469720", abstract = "This work presents the efficient, matrix-free finite-element library hyper.deal for solving partial differential equations in two up to six dimensions with high-order discontinuous Galerkin methods. It builds upon the low-dimensional finite-element library deal.II to create complex low-dimensional meshes and to operate on them individually. These meshes are combined via a tensor product on the fly, and the library provides new special-purpose highly optimized matrix-free functions exploiting domain decomposition as well as shared memory via MPI-3.0 features. Both node-level performance analyses and strong/weak-scaling studies on up to 147,456 CPU cores confirm the efficiency of the implementation. Results obtained with the library hyper.deal are reported for high-dimensional advection problems and for the solution of the Vlasov--Poisson equation in up to six-dimensional phase space.", acknowledgement = ack-nhfb, articleno = "33", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Muruganandam:2021:OSR, author = "Paulsamy Muruganandam and Antun Balaz and Sadhan K. Adhikari", title = "\pkg{OpenMP} solver for rotating spin-1 spin-orbit- and {Rabi}-coupled {Bose--Einstein} condensates", journal = j-COMP-PHYS-COMM, volume = "264", number = "??", pages = "Article 107926", month = jul, year = "2021", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2021.107926", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Wed Jun 9 09:57:27 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465521000618", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Nguyen:2021:EMA, author = "Truong Thao Nguyen and Mohamed Wahib and Ryousei Takano", title = "Efficient {MPI-AllReduce} for large-scale deep learning on {GPU-clusters}", journal = j-CCPE, volume = "33", number = "12", pages = "e5574:1--e5574:??", day = "25", month = jun, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5574", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 22 09:49:53 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurrency Computat., Pract. Exper.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "09 December 2019", } @Article{Perepu:2021:OIP, author = "Pavan Kumar Perepu", title = "{OpenMP} Implementation of Parallel Longest Common Subsequence Algorithm for Mathematical Expression Retrieval", journal = j-PARALLEL-PROCESS-LETT, volume = "31", number = "02", pages = "??--??", month = jun, year = "2021", CODEN = "PPLTEE", DOI = "https://doi.org/10.1142/S0129626421500079", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Thu Feb 17 06:50:36 MST 2022", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.worldscientific.com/doi/10.1142/S0129626421500079", abstract = "Given a mathematical expression in LaTeX or MathML format, retrieval algorithm extracts similar expressions from a database. In our previous work, we have used Longest Common Subsequence (LCS) algorithm to match two expressions of lengths, m and n , which takes O \( mn \) time complexity. If there are T database expressions, total complexity is O \( Tmn \) , and an increase in T also increases this complexity. In the present work, we propose to use parallel LCS algorithm in our retrieval process. Parallel LCS has O \( \max \( m \, n \) \) time complexity with \max \( m \, n \) processors and total complexity can be reduced to O \( T \max \( m \, n \) \) . For our experimentation, OpenMP based implementation has been used on Intel i3 processor with 4 cores. However, for smaller expressions, parallel version takes more time as the implementation overhead dominates the algorithmic improvement. As such, we have proposed to use parallel version, selectively, only on larger expressions, in our retrieval algorithm to achieve better performance. We have compared the sequential and parallel versions of our ME retrieval algorithm, and the performance results have been reported on a database of 829 mathematical expressions.", acknowledgement = ack-nhfb, articleno = "2150007", fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Pimentel-Garcia:2021:EIP, author = "Ernesto Pimentel-Garc{\'\i}a and Carlos Par{\'e}s and Manuel J. Castro and Julian Koellermeier", title = "On the efficient implementation of {PVM} methods and simple {Riemann} solvers. {Application} to the {Roe} method for large hyperbolic systems", journal = j-APPL-MATH-COMP, volume = "388", number = "??", pages = "Article 125544", day = "1", month = jan, year = "2021", CODEN = "AMHCBQ", DOI = "https://doi.org/10.1016/j.amc.2020.125544", ISSN = "0096-3003 (print), 1873-5649 (electronic)", ISSN-L = "0096-3003", bibdate = "Sat Mar 13 06:39:48 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/applmathcomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0096300320305002", acknowledgement = ack-nhfb, fjournal = "Applied Mathematics and Computation", journal-URL = "http://www.sciencedirect.com/science/journal/00963003", } @Article{Pinho:2021:RTI, author = "Luis Miguel Pinho and Sara Royuela and Eduardo Qui{\~n}ones", title = "Real-time Issues in the {Ada} Parallel Model with {OpenMP}", journal = j-SIGADA-LETTERS, volume = "40", number = "2", pages = "96--102", month = apr, year = "2021", CODEN = "AALEE5", DOI = "https://doi.org/10.1145/3463478.3463491", ISSN = "1094-3641 (print), 1557-9476 (electronic)", ISSN-L = "0736-721X", bibdate = "Mon Jun 28 15:50:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigada.bib", URL = "https://dl.acm.org/doi/10.1145/3463478.3463491", abstract = "The current proposal for the next revision of the Ada language considers the possibility to map the language parallel features to an underlying OpenMP runtime. As previously presented, and discussed in previous workshops, the works on fine-grain parallelism in Ada map well to the OpenMP tasking model for parallelism. Nevertheless, and although the general model of integration, and the semantic constructs are already reflected in the proposed revision of the standard, the integration of these new features with the Real-Time Systems Annex of Ada is still not complete. This paper presents an overview of what is supported and the still open issues.", acknowledgement = ack-nhfb, fjournal = "ACM SIGADA Ada Letters", journal-URL = "http://portal.acm.org/citation.cfm?id=J32", } @Article{Proficz:2021:AGA, author = "Jerzy Proficz", title = "All-gather Algorithms Resilient to Imbalanced Process Arrival Patterns", journal = j-TACO, volume = "18", number = "4", pages = "41:1--41:22", month = dec, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3460122", ISSN = "1544-3566 (print), 1544-3973 (electronic)", ISSN-L = "1544-3566", bibdate = "Mon Oct 4 07:14:07 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/taco.bib", URL = "https://dl.acm.org/doi/10.1145/3460122", abstract = "Two novel algorithms for the all-gather operation resilient to imbalanced process arrival patterns (PATs) are presented. The first one, Background Disseminated Ring (BDR), is based on the regular parallel ring algorithm often supplied in MPI implementations and exploits an auxiliary background thread for early data exchange from faster processes to accelerate the performed all-gather operation. The other algorithm, Background Sorted Linear synchronized tree with Broadcast (BSLB), is built upon the already existing PAP-aware gather algorithm, that is, Background Sorted Linear Synchronized tree (BSLS), followed by a regular broadcast distributing gathered data to all participating processes. The background of the imbalanced PAP subject is described, along with the PAP monitoring and evaluation topics. An experimental evaluation of the algorithms based on a proposed mini-benchmark is presented. The mini-benchmark was performed over 2,000 times in a typical HPC cluster architecture with homogeneous compute nodes. The obtained results are analyzed according to different PATs, data sizes, and process numbers, showing that the proposed optimization works well for various configurations, is scalable, and can significantly reduce the all-gather elapsed times, in our case, up to factor 1.9 or 47\% in comparison with the best state-of-the-art solution.", acknowledgement = ack-nhfb, articleno = "41", fjournal = "ACM Transactions on Architecture and Code Optimization (TACO)", journal-URL = "https://dl.acm.org/loi/taco", } @Article{Quaranta:2021:NMH, author = "Lionel Quaranta and Lalith Maddegedara", title = "A novel {MPI+MPI} hybrid approach combining {MPI-3} shared memory windows and {C11\slash C++11} memory model", journal = j-J-PAR-DIST-COMP, volume = "157", number = "??", pages = "125--144", month = nov, year = "2021", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2021.06.008", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Feb 10 06:39:21 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S074373152100143X", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Ramachandran:2021:PPB, author = "Prabhu Ramachandran and Aditya Bhosale and Kunal Puri and Pawan Negi and Abhinav Muta and A. Dinesh and Dileep Menon and Rahul Govind and Suraj Sanka and Amal S. Sebastian and Ananyo Sen and Rohan Kaushik and Anshuman Kumar and Vikas Kurapati and Mrinalgouda Patil and Deep Tavker and Pankaj Pandey and Chandrashekhar Kaushik and Arkopal Dutt and Arpit Agarwal", title = "{PySPH}: a {Python}-based Framework for Smoothed Particle Hydrodynamics", journal = j-TOMS, volume = "47", number = "4", pages = "34:1--34:38", month = dec, year = "2021", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3460773", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Wed Sep 29 06:58:41 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3460773", abstract = "PySPH is an open-source, Python-based, framework for particle methods in general and Smoothed Particle Hydrodynamics (SPH) in particular. PySPH allows a user to define a complete SPH simulation using pure Python. High-performance code is generated from this high-level Python code and executed on either multiple cores, or on GPUs, seamlessly. It also supports distributed execution using MPI. PySPH supports a wide variety of SPH schemes and formulations. These include, incompressible and compressible fluid flow, elastic dynamics, rigid body dynamics, shallow water equations, and other problems. PySPH supports a variety of boundary conditions including mirror, periodic, solid wall, and inlet/outlet boundary conditions. The package is written to facilitate reuse and reproducibility. This article discusses the overall design of PySPH and demonstrates many of its features. Several example results are shown to demonstrate the range of features that PySPH provides.", acknowledgement = ack-nhfb, articleno = "34", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Ramroach:2021:ADP, author = "Sterling Ramroach and Ajay Joshi", title = "Accelerating Data-Parallel Neural Network Training with Weighted-Averaging Reparameterisation", journal = j-PARALLEL-PROCESS-LETT, volume = "31", number = "02", pages = "??--??", month = jun, year = "2021", DOI = "https://doi.org/10.1142/S0129626421500092", ISSN = "0129-6264 (print), 1793-642X (electronic)", ISSN-L = "0129-6264", bibdate = "Thu Feb 17 06:50:36 MST 2022", bibsource = "http://ejournals.wspc.com.sg/ppl/; https://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.worldscientific.com/doi/10.1142/S0129626421500092", abstract = "Recent advances in artificial intelligence has shown a direct correlation between the performance of a network and the number of hidden layers within the network. The Compute Unified Device Architecture (CUDA) framework facilitates the movement of heavy computation from the CPU to the graphics processing unit (GPU) and is used to accelerate the training of neural networks. In this paper, we consider the problem of data-parallel neural network training. We compare the performance of training the same neural network on the GPU with and without data parallelism. When data parallelism is used, we compare with both the conventional averaging of coefficients and our proposed method. We set out to show that not all sub-networks are equal and thus, should not be treated as equals when normalising weight vectors. The proposed method achieved state of the art accuracy faster than conventional training along with better classification performance in some cases.", acknowledgement = ack-nhfb, articleno = "2150009", fjournal = "Parallel Processing Letters", journal-URL = "http://www.worldscientific.com/loi/ppl", } @Article{Reano:2021:RRC, author = "Carlos Rea{\~n}o and Federico Silla", title = "Redesigning the {rCUDA} communication layer for a better adaptation to the underlying hardware", journal = j-CCPE, volume = "33", number = "14", pages = "e5481:1--e5481:??", day = "25", month = jul, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.5481", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 22 09:49:55 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurrency Computat., Pract. Exper.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "19 August 2019", } @Article{Rundo:2021:CPM, author = "Leonardo Rundo and Andrea Tangherloni and Marco S. Nobile", title = "A {CUDA}-powered method for the feature extraction and unsupervised analysis of medical images", journal = j-J-SUPERCOMPUTING, volume = "77", number = "8", pages = "8514--8531", month = aug, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03565-8", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:32 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03565-8", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Schuchart:2021:CBC, author = "Joseph Schuchart and Philipp Samfass and Christoph Niethammer and Jos{\'e} Gracia and George Bosilca", title = "Callback-based completion notification using {MPI} Continuations", journal = j-PARALLEL-COMPUTING, volume = "106", number = "??", pages = "??--??", month = sep, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102793", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:17 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000466", acknowledgement = ack-nhfb, articleno = "102793", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Schwarzrock:2021:RNI, author = "J. Schwarzrock and C. C. {de Oliveira} and M. Ritt and A. F. Lorenzon and A. C. S. Beck", title = "A Runtime and Non-Intrusive Approach to Optimize {EDP} by Tuning Threads and {CPU} Frequency for {OpenMP} Applications", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "32", number = "7", pages = "1713--1724", year = "2021", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2020.3046537", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Mar 19 06:51:50 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Sojoodi:2021:IGG, author = "Amir Hossein Sojoodi and Majid Salimi Beni and Farshad Khunjush", title = "{Igniteg-GPU}: a {GPU}-enabled in-memory computing architecture on clusters", journal = j-J-SUPERCOMPUTING, volume = "77", number = "3", pages = "3165--3192", month = mar, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03390-z", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:19:59 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03390-z", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 27 July 2020 Pages: 3165 - 3192", } @Article{Spiliotis:2021:PCD, author = "Iraklis M. Spiliotis and Charalampos Sitaridis and Michael P. Bekakos", title = "Parallel Computation of Discrete Orthogonal Moment on Block Represented Images Using {OpenMP}", journal = j-INT-J-PARALLEL-PROG, volume = "49", number = "3", pages = "440--462", month = jun, year = "2021", CODEN = "IJPPE5", DOI = "https://doi.org/10.1007/s10766-021-00713-2", ISSN = "0885-7458 (print), 1573-7640 (electronic)", ISSN-L = "0885-7458", bibdate = "Fri May 14 08:58:34 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s10766-021-00713-2", acknowledgement = ack-nhfb, fjournal = "International Journal of Parallel Programming", journal-URL = "http://link.springer.com/journal/10766", online-date = "Published: 15 April 2021 Pages: 440 - 462", } @Article{Sun:2021:ACW, author = "J. Sun and N. Guan and J. Sun and X. Zhang and Y. Chi and F. Li", title = "Algorithms for Computing the {WCRT} Bound of {OpenMP} Task Systems With Conditional Branches", journal = j-IEEE-TRANS-COMPUT, volume = "70", number = "1", pages = "57--71", month = jan, year = "2021", CODEN = "ITCOB4", DOI = "https://doi.org/10.1109/TC.2020.2984502", ISSN = "0018-9340 (print), 1557-9956 (electronic)", ISSN-L = "0018-9340", bibdate = "Thu Dec 17 19:35:03 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Computers", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12", } @Article{Taft:2021:LMA, author = "S. Tucker Taft", title = "A Layered Mapping of {Ada 202X} to {OpenMP}", journal = j-SIGADA-LETTERS, volume = "40", number = "2", pages = "55--58", month = apr, year = "2021", CODEN = "AALEE5", DOI = "https://doi.org/10.1145/3463478.3463482", ISSN = "1094-3641 (print), 1557-9476 (electronic)", ISSN-L = "0736-721X", bibdate = "Mon Jun 28 15:50:16 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/sigada.bib", URL = "https://dl.acm.org/doi/10.1145/3463478.3463482", abstract = "The OpenMP specification defines a set of compiler directives, library routines, and environment variables that together represent the OpenMP Application Programming Interface, and is currently defined for C, C++, and Fortran. The forthcoming version of Ada, currently dubbed Ada 202X, includes lightweight parallelism features, in particular parallel blocks and parallel loops. All versions of Ada, since its inception in 1983, have included ``tasking,'' which corresponds to what are traditionally considered ``heavyweight'' parallelism features, or simply ``concurrency'' features. Ada ``tasks'' typically map to what are called ``kernel threads,'' in that the operating system manages them and schedules them. However, one of the goals of lightweight parallelism is to reduce overhead by doing more of the management outside the kernel of the operating system, using a light-weight-thread (LWT) scheduler. The OpenMP library routines support both levels of threading, but for Ada 202X, the main interest is in making use of OpenMP for its lightweight thread scheduling capabilities.", acknowledgement = ack-nhfb, fjournal = "ACM SIGADA Ada Letters", journal-URL = "http://portal.acm.org/citation.cfm?id=J32", } @Article{Takizawa:2021:OLO, author = "Hiroyuki Takizawa and Shinji Shiotsuki and Naoki Ebata and Ryusuke Egawa", title = "{OpenCL}-like offloading with metaprogramming for {SX}-Aurora {TSUBASA}", journal = j-PARALLEL-COMPUTING, volume = "102", number = "??", pages = "Article 102754", month = may, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102754", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Mon Mar 29 11:36:03 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000144", acknowledgement = ack-nhfb, fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Tanaka:2021:NRP, author = "Ushio Tanaka and Masami Saga and Junji Nakano", title = "\pkg{NScluster}: An {R} Package for Maximum Palm Likelihood Estimation for Cluster Point Process Models Using {OpenMP}", journal = j-J-STAT-SOFT, volume = "98", number = "??", pages = "??--??", month = "????", year = "2021", CODEN = "JSSOBK", DOI = "https://doi.org/10.18637/jss.v98.i06", ISSN = "1548-7660", ISSN-L = "1548-7660", bibdate = "Fri Jul 23 08:12:54 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jstatsoft.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.jstatsoft.org/index.php/jss/article/view/v098i06; https://www.jstatsoft.org/index.php/jss/article/view/v098i06/v98i06.pdf", acknowledgement = ack-nhfb, journal-URL = "http://www.jstatsoft.org/", } @Article{Traff:2021:MCC, author = "Jesper Larsson Tr{\"a}ff and Sascha Hunold and Guillaume Mercier and Daniel J. Holmes", title = "{MPI} collective communication through a single set of interfaces: a case for orthogonality", journal = j-PARALLEL-COMPUTING, volume = "107", number = "??", pages = "??--??", month = oct, year = "2021", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102826", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:17 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000740", acknowledgement = ack-nhfb, articleno = "102826", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Wang:2021:ATD, author = "Farui Wang and Weizhe Zhang and Zheng Wang", title = "Automatic translation of data parallel programs for heterogeneous parallelism through {OpenMP} offloading", journal = j-J-SUPERCOMPUTING, volume = "77", number = "5", pages = "4957--4987", month = may, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03452-2", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:20:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03452-2", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 29 October 2020 Pages: 4957 - 4987", } @Article{Wang:2021:PBD, author = "Shao-Chung Wang and Lin-Ya Yu and Li-An Her and Yuan-Shin Hwang and Jenq-Kuen Lee", title = "Pointer-Based Divergence Analysis for {OpenCL 2.0} Programs", journal = j-TOPC, volume = "8", number = "4", pages = "20:1--20:23", month = dec, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3470644", ISSN = "2329-4949 (print), 2329-4957 (electronic)", ISSN-L = "2329-4949", bibdate = "Fri Dec 10 10:52:35 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/topc.bib", URL = "https://dl.acm.org/doi/10.1145/3470644", abstract = "A modern GPU is designed with many large thread groups to achieve a high throughput and performance. Within these groups, the threads are grouped into fixed-size SIMD batches in which the same instruction is applied to vectors of data in a lockstep. This \ldots{}", acknowledgement = ack-nhfb, articleno = "20", fjournal = "ACM Transactions on Parallel Computing", journal-URL = "https://dl.acm.org/loi/topc", } @Article{Wang:2021:PBS, author = "Y. Wang and X. Jiang and N. Guan and Z. Guo and X. Liu and W. Yi", title = "Partitioning-Based Scheduling of {OpenMP} Task Systems With Tied Tasks", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "32", number = "6", pages = "1322--1339", year = "2021", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2020.3048373", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Fri Mar 19 06:51:50 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Xue:2021:IFG, author = "Weicheng Xue and Charles W. Jackson and Christoper J. Roy", title = "An improved framework of {GPU} computing for {CFD} applications on structured grids using {OpenACC}", journal = j-J-PAR-DIST-COMP, volume = "156", number = "??", pages = "64--85", month = oct, year = "2021", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2021.05.010", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Feb 10 06:39:19 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731521001155", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Xue:2021:MGP, author = "Weicheng Xue and Christoper J. Roy", title = "Multi-{GPU} performance optimization of a computational fluid dynamics code using {OpenACC}", journal = j-CCPE, volume = "33", number = "5", pages = "e6036:1--e6036:??", day = "10", month = mar, year = "2021", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.6036", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue May 18 08:31:21 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "28 September 2020", } @Article{Yang:2021:HMC, author = "Sheng-Chun Yang and Yong-Lei Wang", title = "A hybrid {MPI-CUDA} approach for nonequispaced discrete {Fourier} transformation", journal = j-COMP-PHYS-COMM, volume = "258", number = "??", pages = "Article 107513", month = jan, year = "2021", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2020.107513", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Sat Mar 13 08:21:40 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465520302393", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Yang:2021:SSG, author = "Lishan Yang and Bin Nie and Adwait Jog and Evgenia Smirni", title = "{SUGAR}: Speeding Up {GPGPU} Application Resilience Estimation with Input Sizing", journal = j-POMACS, volume = "5", number = "1", pages = "01:1--01:29", month = feb, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447375", ISSN = "2476-1249", ISSN-L = "2476-1249", bibdate = "Mon Mar 29 10:31:36 MDT 2021", bibsource = "http://portal.acm.org/https://www.math.utah.edu/pub/tex/bib/pomacs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/10.1145/3447375", abstract = "As Graphics Processing Units (GPUs) are becoming a de facto solution for accelerating a wide range of applications, their reliable operation is becoming increasingly important. One of the major challenges in the domain of GPU reliability is to \ldots{}", acknowledgement = ack-nhfb, articleno = "01", fjournal = "Proceedings of the ACM on Measurement and Analysis of Computing Systems (POMACS)", journal-URL = "https://dl.acm.org/loi/pomacs", } @Article{Zhang:2021:IRP, author = "Jingrong Zhang and Zihao Wang and Zhiyong Liu and Fa Zhang", title = "Improve the Resolution and Parallel Performance of the Three-Dimensional Refine Algorithm in {RELION} Using {CUDA} and {MPI}", journal = j-TCBB, volume = "18", number = "2", pages = "583--595", month = mar, year = "2021", CODEN = "ITCBCY", DOI = "https://doi.org/10.1109/TCBB.2019.2929171", ISSN = "1545-5963 (print), 1557-9964 (electronic)", ISSN-L = "1545-5963", bibdate = "Fri Mar 4 08:29:16 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/tcbb.bib", URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2929171", abstract = "In cryo-electron microscopy, RELION is a powerful tool for high-resolution reconstruction. Due to the complicated imaging procedure and the heterogeneity of particles, some of the selected particle images offer more disturbing information than others. \ldots{}", acknowledgement = ack-nhfb, fjournal = "IEEE/ACM Transactions on Computational Biology and Bioinformatics", journal-URL = "https://dl.acm.org/loi/tcbb", } @Article{Zhou:2021:HPG, author = "Chao Zhou", title = "High Performance Graph Data Imputation on Multiple {GPUs}", journal = j-FUTURE-INTERNET, volume = "13", number = "2", pages = "36", day = "31", month = jan, year = "2021", CODEN = "????", DOI = "https://doi.org/10.3390/fi13020036", ISSN = "1999-5903", bibdate = "Fri Feb 26 10:54:58 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/future-internet.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://www.mdpi.com/1999-5903/13/2/36", abstract = "In real applications, massive data with graph structures are often incomplete due to various restrictions. Therefore, graph data imputation algorithms have been widely used in the fields of social networks, sensor networks, and MRI to solve the graph data completion problem. To keep the data relevant, a data structure is represented by a graph-tensor, in which each matrix is the vertex value of a weighted graph. The convolutional imputation algorithm has been proposed to solve the low-rank graph-tensor completion problem that some data matrices are entirely unobserved. However, this data imputation algorithm has limited application scope because it is compute-intensive and low-performance on CPU. In this paper, we propose a scheme to perform the convolutional imputation algorithm with higher time performance on GPUs (Graphics Processing Units) by exploiting multi-core GPUs of CUDA architecture. We propose optimization strategies to achieve coalesced memory access for graph Fourier transform (GFT) computation and improve the utilization of GPU SM resources for singular value decomposition (SVD) computation. Furthermore, we design a scheme to extend the GPU-optimized implementation to multiple GPUs for large-scale computing. Experimental results show that the GPU implementation is both fast and accurate. On synthetic data of varying sizes, the GPU-optimized implementation running on a single Quadro RTX6000 GPU achieves up to 60.50$ \times $ speedups over the GPU-baseline implementation. The multi-GPU implementation achieves up to 1.81$ \times $ speedups on two GPUs versus the GPU-optimized implementation on a single GPU. On the ego-Facebook dataset, the GPU-optimized implementation achieves up to 77.88$ \times $ speedups over the GPU-baseline implementation. Meanwhile, the GPU implementation and the CPU implementation achieve similar, low recovery errors.", acknowledgement = ack-nhfb, journal-URL = "https://www.mdpi.com/1999-5903/", remark = "Section Smart System Infrastructure and Applications.", } @Article{Zhu:2021:POT, author = "Zijie Zhu and Yongxian Wang and Xinghua Cheng", title = "Parallel optimization of three-dimensional wedge-shaped underwater acoustic propagation based on {MPI + OpenMP} hybrid programming model", journal = j-J-SUPERCOMPUTING, volume = "77", number = "5", pages = "4988--5018", month = may, year = "2021", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-020-03466-w", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Fri May 14 09:20:00 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-020-03466-w", acknowledgement = ack-nhfb, fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", online-date = "Published: 29 October 2020 Pages: 4988 - 5018", } @Article{Agathos:2022:CAA, author = "Spiros N. Agathos and Vassilios V. Dimakopoulos and Ilias K. Kasmeridis", title = "Compiler-assisted, adaptive runtime system for the support of {OpenMP} in embedded multicores", journal = j-PARALLEL-COMPUTING, volume = "110", number = "??", pages = "??--??", month = may, year = "2022", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2022.102895", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819122000035", acknowledgement = ack-nhfb, articleno = "102895", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Bak:2022:OAE, author = "Seonmyeong Bak and Colleen Bertoni and Swen Boehm and Reuben Budiardja and Barbara M. Chapman and Johannes Doerfert and Markus Eisenbach and Hal Finkel and Oscar Hernandez and Joseph Huber and Shintaro Iwasaki and Vivek Kale and Paul R. C. Kent and JaeHyuk Kwack and Meifeng Lin and Piotr Luszczek and Ye Luo and Buu Pham and Swaroop Pophale and Kiran Ravikumar and Vivek Sarkar and Thomas Scogland and Shilei Tian and P. K. Yeung", title = "{OpenMP} application experiences: Porting to accelerated nodes", journal = j-PARALLEL-COMPUTING, volume = "109", number = "??", pages = "??--??", month = mar, year = "2022", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102856", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001009", acknowledgement = ack-nhfb, articleno = "102856", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Barai:2022:PMP, author = "Atanu Barai and Yehia Arafa and Stephan Eidenbenz", title = "\pkg{PPT-Multicore}: performance prediction of {OpenMP} applications using reuse profiles and analytical modeling", journal = j-J-SUPERCOMPUTING, volume = "78", number = "2", pages = "2354--2385", month = feb, year = "2022", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-021-03949-4", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:34 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-021-03949-4", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Bouhrour:2022:TLC, author = "Stephane Bouhrour and Thibaut Pepin and Julien Jaeger", title = "Towards leveraging collective performance with the support of {MPI 4.0} features in {MPC}", journal = j-PARALLEL-COMPUTING, volume = "109", number = "??", pages = "??--??", month = mar, year = "2022", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102860", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001034", acknowledgement = ack-nhfb, articleno = "102860", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Delmas:2022:MGI, author = "Vincent Delmas and Azzedine Soula{\"\i}mani", title = "Multi-{GPU} implementation of a time-explicit finite volume solver using {CUDA} and a {CUDA}-Aware version of {OpenMPI} with application to shallow water flows", journal = j-COMP-PHYS-COMM, volume = "271", number = "??", pages = "Article 108190", month = feb, year = "2022", CODEN = "CPHCBZ", DOI = "https://doi.org/10.1016/j.cpc.2021.108190", ISSN = "0010-4655 (print), 1879-2944 (electronic)", ISSN-L = "0010-4655", bibdate = "Mon Dec 20 16:41:52 MST 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0010465521003027", acknowledgement = ack-nhfb, fjournal = "Computer Physics Communications", journal-URL = "http://www.sciencedirect.com/science/journal/00104655", } @Article{Dichev:2022:PLR, author = "Kiril Dichev and Daniele {De Sensi} and Dimitrios S. Nikolopoulos and Kirk W. Cameron and Ivor Spence", title = "{Power Log n Roll}: Power-Efficient Localized Rollback for {MPI} Applications Using Message Logging Protocols", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "33", number = "6", pages = "1276--1288", month = jun, year = "2022", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2021.3107745", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Nov 9 11:11:37 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Du:2022:MPO, author = "Qi Du and Hui Huang", title = "{MPI} parameter optimization during debugging phase of {HPC} system", journal = j-J-SUPERCOMPUTING, volume = "78", number = "2", pages = "1696--1711", month = feb, year = "2022", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-021-03939-6", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:34 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-021-03939-6", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Gonzalez-Dominguez:2022:MDP, author = "Jorge Gonz{\'a}lez-Dom{\'\i}nguez and Jos{\'e} M. Mart{\'\i}n-Mart{\'\i}nez and Roberto R. Exp{\'o}sito", title = "\pkg{MPI-dot2dot}: A parallel tool to find {DNA} tandem repeats on multicore clusters", journal = j-J-SUPERCOMPUTING, volume = "78", number = "3", pages = "4217--4235", month = feb, year = "2022", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-021-04025-7", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:34 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-021-04025-7", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Haghi:2022:RSH, author = "Pouya Haghi and Anqi Guo and Qingqing Xiong and Chen Yang and Tong Geng and Justin T. Broaddus and Ryan Marshall and Derek Schafer and Anthony Skjellum and Martin C. Herbordt", title = "Reconfigurable switches for high performance and flexible {MPI} collectives", journal = j-CCPE, volume = "34", number = "6", pages = "e6769:1--e6769:??", day = "10", month = mar, year = "2022", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.6769", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 22 09:50:09 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurrency Computat., Pract. Exper.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "12 December 2021", } @Article{Huckelheim:2022:SSA, author = "Jan H{\"u}ckelheim and Laurent Hasco{\"e}t", title = "Source-to-Source Automatic Differentiation of {OpenMP} Parallel Loops", journal = j-TOMS, volume = "48", number = "1", pages = "7:1--7:32", month = mar, year = "2022", CODEN = "ACMSCU", DOI = "https://doi.org/10.1145/3472796", ISSN = "0098-3500 (print), 1557-7295 (electronic)", ISSN-L = "0098-3500", bibdate = "Thu Feb 17 08:00:57 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/toms.bib", URL = "https://dl.acm.org/doi/10.1145/3472796", abstract = "differentiation of OpenMP parallel worksharing loops in forward and reverse mode. Automatic differentiation is a method to obtain gradients of numerical programs, which are crucial in optimization, uncertainty quantification, and machine learning. The computational cost to compute gradients is a common bottleneck in practice. For applications that are parallelized for multicore CPUs or GPUs using OpenMP, one also wishes to compute the gradients in parallel. We propose a framework to reason about the correctness of the generated derivative code, from which we justify our OpenMP extension to the differentiation model. We implement this model in the automatic differentiation tool Tapenade and present test cases that are differentiated following our extended differentiation procedure. Performance of the generated derivative programs in forward and reverse mode is better than sequential, although our reverse mode often scales worse than the input programs.", acknowledgement = ack-nhfb, articleno = "7", fjournal = "ACM Transactions on Mathematical Software (TOMS)", journal-URL = "https://dl.acm.org/loi/toms", } @Article{Jani:2022:HST, author = "Kunal Jani and Ankit Kumar and Ronak Nahata", title = "\pkg{Hpcfolder}: a simple tool used to parallelize algorithms using the message passing interface {(MPI)}", journal = j-J-SUPERCOMPUTING, volume = "78", number = "1", pages = "258--278", month = jan, year = "2022", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-021-03896-0", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:33 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-021-03896-0", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Janssen:2022:GPU, author = "Dylan M. Janssen and Wayne Pullan and Alan Wee-Chung Liew", title = "Graphics processing unit acceleration of the island model genetic algorithm using the {CUDA} programming platform", journal = j-CCPE, volume = "34", number = "2", pages = "e6286:1--e6286:??", day = "25", month = jan, year = "2022", CODEN = "CCPEBO", DOI = "https://doi.org/10.1002/cpe.6286", ISSN = "1532-0626 (print), 1532-0634 (electronic)", ISSN-L = "1532-0626", bibdate = "Tue Feb 22 09:50:05 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/ccpe.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, ajournal = "Concurrency Computat., Pract. Exper.", fjournal = "Concurrency and Computation: Practice and Experience", journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626", onlinedate = "31 March 2021", } @Article{Li:2022:CDC, author = "Wentao Li and Zhiwen Chen and Xin He and Guoyun Duan and Jianhua Sun and Hao Chen", title = "{CVFuzz}: Detecting complexity vulnerabilities in {OpenCL} kernels via automated pathological input generation", journal = j-FUT-GEN-COMP-SYS, volume = "127", number = "??", pages = "384--395", month = feb, year = "2022", CODEN = "FGSEVI", DOI = "https://doi.org/10.1016/j.future.2021.09.006", ISSN = "0167-739X (print), 1872-7115 (electronic)", ISSN-L = "0167-739X", bibdate = "Wed Feb 9 09:07:25 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/futgencompsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167739X21003526", acknowledgement = ack-nhfb, fjournal = "Future Generation Computer Systems", journal-URL = "http://www.sciencedirect.com/science/journal/0167739X", } @Article{Meyer:2022:DFA, author = "Marius Meyer and Tobias Kenter and Christian Plessl", title = "In-depth {FPGA} accelerator performance evaluation with single node benchmarks from the {HPC} challenge benchmark suite for {Intel} and {Xilinx} {FPGAs} using {OpenCL}", journal = j-J-PAR-DIST-COMP, volume = "160", number = "??", pages = "79--89", month = feb, year = "2022", CODEN = "JPDCER", DOI = "https://doi.org/10.1016/j.jpdc.2021.10.007", ISSN = "0743-7315 (print), 1096-0848 (electronic)", ISSN-L = "0743-7315", bibdate = "Thu Feb 10 06:39:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0743731521002057", acknowledgement = ack-nhfb, fjournal = "Journal of Parallel and Distributed Computing", journal-URL = "http://www.sciencedirect.com/science/journal/07437315", } @Article{Protze:2022:MDT, author = "Joachim Protze and Marc-Andr{\'e} Hermanns and Matthias S. M{\"u}ller and Van Man Nguyen and Julien Jaeger and Emmanuelle Saillard and Patrick Carribault and Denis Barthou", title = "{MPI} detach --- Towards automatic asynchronous local completion", journal = j-PARALLEL-COMPUTING, volume = "109", number = "??", pages = "??--??", month = mar, year = "2022", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102859", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001022", acknowledgement = ack-nhfb, articleno = "102859", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } @Article{Raskovalov:2022:AMD, author = "Anton Raskovalov and Platon Surkov", title = "{azTotMD 2.0}: {Molecular} dynamics with the radiative thermostat and temperature-dependent force field ({CUDA} version)", journal = j-SOFTWAREX, volume = "17", number = "??", pages = "??--??", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1016/j.softx.2022.100995", ISSN = "2352-7110", ISSN-L = "2352-7110", bibdate = "Mon Feb 28 10:41:25 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/softwarex.bib", URL = "http://www.sciencedirect.com/science/article/pii/S2352711022000127", acknowledgement = ack-nhfb, articleno = "100995", fjournal = "SoftwareX", journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues", } @Article{Rocco:2022:LFR, author = "Roberto Rocco and Davide Gadioli and Gianluca Palermo", title = "\pkg{Legio}: fault resiliency for embarrassingly parallel {MPI} applications", journal = j-J-SUPERCOMPUTING, volume = "78", number = "2", pages = "2175--2195", month = feb, year = "2022", CODEN = "JOSUED", DOI = "https://doi.org/10.1007/s11227-021-03951-w", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Mon Feb 28 16:44:34 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/jsuper.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://link.springer.com/article/10.1007/s11227-021-03951-w", acknowledgement = ack-nhfb, ajournal = "J. Supercomputing", fjournal = "The Journal of Supercomputing", journal-URL = "http://link.springer.com/journal/11227", } @Article{Smith:2022:PAM, author = "Matthew Smith and Arjen Tamerus and Phil Hasnip", title = "Portable Acceleration of Materials Modeling Software: {CASTEP}, {GPUs}, and {OpenACC}", journal = j-COMPUT-SCI-ENG, volume = "24", number = "1", pages = "46--55", month = jan # "\slash " # feb, year = "2022", CODEN = "CSENFA", DOI = "https://doi.org/10.1109/MCSE.2022.3141714", ISSN = "1521-9615 (print), 1558-366X (electronic)", ISSN-L = "1521-9615", bibdate = "Thu Mar 17 07:23:22 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/computscieng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "Computing in Science and Engineering", journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992", } @Article{Zhao:2022:SGM, author = "Chen Zhao and Wu Gao and Feiping Nie and Huiyang Zhou", title = "A Survey of {GPU} Multitasking Methods Supported by Hardware Architecture", journal = j-IEEE-TRANS-PAR-DIST-SYS, volume = "33", number = "6", pages = "1451--1463", month = jun, year = "2022", CODEN = "ITDSEO", DOI = "https://doi.org/10.1109/TPDS.2021.3115630", ISSN = "1045-9219 (print), 1558-2183 (electronic)", ISSN-L = "1045-9219", bibdate = "Tue Nov 9 11:11:37 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, fjournal = "IEEE Transactions on Parallel and Distributed Systems", journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71", } @Article{Zhong:2022:ULV, author = "Dong Zhong and Qinglei Cao and George Bosilca and Jack Dongarra", title = "Using long vector extensions for {MPI} reductions", journal = j-PARALLEL-COMPUTING, volume = "109", number = "??", pages = "??--??", month = mar, year = "2022", CODEN = "PACOEJ", DOI = "https://doi.org/10.1016/j.parco.2021.102871", ISSN = "0167-8191 (print), 1872-7336 (electronic)", ISSN-L = "0167-8191", bibdate = "Fri Feb 18 10:07:18 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001137", acknowledgement = ack-nhfb, articleno = "102871", fjournal = "Parallel Computing", journal-URL = "http://www.sciencedirect.com/science/journal/01678191", } %%% ==================================================================== %%% Cross-referenced entries must come last; entries are sorted by year, %%% and then by citation label, with `bibsort --byyear': @Proceedings{Anonymous:1989:PFC, editor = "Anonymous", booktitle = "{Proceedings of the Fourth Conference on Hypercubes, Concurrent Computers and Applications, 6--8 March 1989, Monterey, CA, USA}", title = "{Proceedings of the Fourth Conference on Hypercubes, Concurrent Computers and Applications, 6--8 March 1989, Monterey, CA, USA}", publisher = "Golden Gate Enterprises", address = "Los Altos, CA, USA", pages = "xiv + 1362", year = "1989", ISBN = "", ISBN-13 = "", LCCN = "QA76.5.C619215 1989", bibdate = "Sun Dec 22 10:16:53 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes", acknowledgement = ack-nhfb, confsponsor = "D.O.E.; US Air Force; NASA", } @Proceedings{ACM:1990:PAC, editor = "{ACM}", booktitle = "{Proceedings of the 1990 ACM Conference on LISP and Functional Programming: papers presented at the conference, Nice, France, June 27--29, 1990}", title = "{Proceedings of the 1990 ACM Conference on LISP and Functional Programming: papers presented at the conference, Nice, France, June 27--29, 1990}", publisher = pub-ACM, address = pub-ACM:adr, pages = "viii + 348", year = "1990", ISBN = "0-89791-368-X", ISBN-13 = "978-0-89791-368-3", LCCN = "QA 76.73 L23 A24 1990", bibdate = "Wed Apr 16 07:21:40 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order no. 552900.", acknowledgement = ack-nhfb, confsponsor = "ACM", } @Proceedings{Bhavsar:1991:SSJ, editor = "Virendrakumar Chhabulal Bhavsar and Uday Govinddas Gujar", booktitle = "{Supercomputing Symposium '91, June 3--5, 1991, Fredericton, NB, Canada: symposium proceedings}", title = "{Supercomputing Symposium '91, June 3--5, 1991, Fredericton, NB, Canada: symposium proceedings}", publisher = "University of New Brunswick Press", address = "Fredericton, NB, Canada", pages = "x + 544", year = "1991", ISBN = "0-920114-14-8", ISBN-13 = "978-0-920114-14-8", LCCN = "QA76.88.S87 1991", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, corpsource = "Centre for Dev. of Adv. Comput., Bangalore, India", pubcountry = "Canada", treatment = "P Practical", } @Proceedings{Durand:1991:HPC, editor = "M. Durand and F. {El Dabaghi}", booktitle = "{High performance computing, II: proceedings of the Second Symposium on High Performance Computing, Montpellier, France, 7--9 October, 1991}", title = "{High performance computing, II: proceedings of the Second Symposium on High Performance Computing, Montpellier, France, 7--9 October, 1991}", publisher = pub-NH, address = pub-NH:adr, pages = "xii + 673", year = "1991", ISBN = "0-444-89224-9", ISBN-13 = "978-0-444-89224-9", LCCN = "QA75.5.I585 1991", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, numericalindex = "Computer speed 2.0E+07 to 6.0E+07 FLOPS", pubcountry = "Netherlands", } @Proceedings{IEEE:1991:PSA, editor = "{IEEE}", key = "Supercomputing '91", booktitle = "{Proceedings, Supercomputing '91: Albuquerque, New Mexico, November 18--22, 1991}", title = "{Proceedings, Supercomputing '91: Albuquerque, New Mexico, November 18--22, 1991}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxiii + 917", year = "1991", ISBN = "0-8186-9158-1 (IEEE: case), 0-8186-2158-3 (IEEE: paper), 0-8186-6158-5 (IEEE: microfiche), 0-89791-459-7 (ACM)", ISBN-13 = "978-0-8186-9158-4 (IEEE: case), 978-0-8186-2158-1 (IEEE: paper), 978-0-8186-6158-7 (IEEE: microfiche), 978-0-89791-459-8 (ACM)", LCCN = "QA76.5 .S894 1991", bibdate = "Mon Jan 15 11:05:59 1996", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 91CH3058-5.", } @Proceedings{Stout:1991:SDM, editor = "Quentin F. Stout and Michael Joseph Wolfe", booktitle = "{The Sixth Distributed Memory Computing Conference proceedings April 28--May 1, 1991, Portland, Oregon}", title = "{The Sixth Distributed Memory Computing Conference proceedings April 28--May 1, 1991, Portland, Oregon}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xx + 736", year = "1991", ISBN = "0-8186-2291-1", ISBN-13 = "978-0-8186-2291-5", LCCN = "QA76.5 .D58 1991", bibdate = "Tue Jan 16 07:21:24 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1992:PSE, editor = "Anonymous", booktitle = "{Proceedings SHARE Europe Anniversary Meeting}", title = "{Proceedings SHARE Europe Anniversary Meeting}", publisher = "SHARE Eur. Assoc", address = "Geneva, Switzerland", pages = "752", year = "1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "28 Sept.--2 Oct. 1992", conflocation = "Davos, Switzerland", pubcountry = "Switzerland", } @Proceedings{Dongarra:1992:PFS, editor = "J. Dongarra and P. Kennedy and P. Messina and D. C. Sorensen and R. G. Voigt", booktitle = "{Proceedings of the Fifth SIAM Conference on Parallel Processing for Scientific Computing, 25--27 March 1991, Houston, TX, USA}", title = "{Proceedings of the Fifth SIAM Conference on Parallel Processing for Scientific Computing, 25--27 March 1991, Houston, TX, USA}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "xvii + 648", year = "1992", ISBN = "0-89871-303-X", ISBN-13 = "978-0-89871-303-9", LCCN = "QA76.58.P76 1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confsponsor = "SIAM", } @Proceedings{Evans:1992:PCP, editor = "D. J. Evans and G. R. Joubert and H. Liddell", booktitle = "{Parallel computing '91: proceedings of the International Conference on Parallel Computing '91, London, UK, 3--6 September 1991}", title = "{Parallel computing '91: proceedings of the International Conference on Parallel Computing '91, London, UK, 3--6 September 1991}", volume = "4", publisher = pub-NH, address = pub-NH:adr, pages = "xi + 628", year = "1992", ISBN = "0-444-89212-5", ISBN-13 = "978-0-444-89212-6", LCCN = "QA76.58.I545 1991", bibdate = "Sun Dec 22 10:17:16 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Advances in parallel computing", acknowledgement = ack-nhfb, confsponsor = "Elsevier Sci. Publishers; Maspar Comput. Corp.; NCUBE; Office Naval Res. Eur. Office; Transtech", numericalindex = "Byte rate 6.0E+06 Byte/s", pubcountry = "Netherlands", } @Proceedings{Ferenczi:1992:AHW, editor = "S. Ferenczi", booktitle = "{1st Austrian-Hungarian Workshop on Transporter Applications. Proceedings}", title = "{1st Austrian-Hungarian Workshop on Transporter Applications. Proceedings}", publisher = "Hungarian Acad.of Sci", address = "Budapest, Hungary", pages = "v + 117", year = "1992", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "8--10 Oct. 1992", conflocation = "Sopron, Hungary", pubcountry = "Hungary", } @Proceedings{IEEE:1992:PSH, editor = "{IEEE}", booktitle = "{Proceedings / Scalable High Performance Computing Conference, SHPCC-92, April 26--29, 1992, Williamsburg, Virginia}", title = "{Proceedings / Scalable High Performance Computing Conference, SHPCC-92, April 26--29, 1992, Williamsburg, Virginia}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiii + 448", year = "1992", ISBN = "0-8186-2775-1", ISBN-13 = "978-0-8186-2775-0", LCCN = "QA76.76.A65S33 1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 92TH0432-5.", acknowledgement = ack-nhfb, confsponsor = "IEEE", } @Proceedings{Russell:1992:CMW, editor = "Thomas F. Russell and others", booktitle = "{Computational methods in water resources IX: Proceedings of the Ninth International Conference on Computational Methods in Water Resources, held at the University of Colorado, Denver, in June 1992}", title = "{Computational methods in water resources IX: Proceedings of the Ninth International Conference on Computational Methods in Water Resources, held at the University of Colorado, Denver, in June 1992}", publisher = pub-ELSAS, address = pub-ELSAS:adr, pages = "various", year = "1992", ISBN = "1-85166-871-3 (set), 1-85312-169-X (set: Computational Mechanics Publications, Southampton), 1-56252-098-9 (set: Computational Mechanics Publications, Boston), 1-85166-791-1 (v. 1: Elsevier Applied Science), 1-85312-197-5 (v. 1: Computational Mechanics Publications, Southampton), 1-56252-123-3 (v. 1: Computational Mechanics Publications, New York), 1-85166-870-5 (v. 2), 1-85312-198-3 (v. 2), 1-56252-124-1 (v. 2)", ISBN-13 = "978-1-85166-871-7 (set), 978-1-85312-169-2 (set: Computational Mechanics Publications, Southampton), 978-1-56252-098-4 (set: Computational Mechanics Publications, Boston), 978-1-85166-791-8 (v. 1: Elsevier Applied Science), 978-1-85312-197-5 (v. 1: Computational Mechanics Publications, Southampton), 978-1-56252-123-3 (v. 1: Computational Mechanics Publications, New York), 978-1-85166-870-0 (v. 2), 978-1-85312-198-2 (v. 2), 978-1-56252-124-0 (v. 2)", LCCN = "GB656.2.E42 C65 1992 v.1-2 (c1992)", bibdate = "Mon Jan 15 18:04:49 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", acknowledgement = ack-nhfb, } @Proceedings{SCRI:1992:PWC, key = "SCRI WCC'92", booktitle = "{Proceedings of the Workshop on Cluster Computing}", title = "{Proceedings of the Workshop on Cluster Computing}", publisher = pub-SCRI, address = pub-SCRI:adr, pages = "??", month = dec, year = "1992", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Tue Jan 16 07:34:08 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Proceedings available via anonymous ftp from \path=ftp.scri.fsu.edu= in directory \path=pub/parallel-workshop.92=.", acknowledgement = ack-nhfb, } @Proceedings{Siegel:1992:FFS, editor = "H. J. Siegel", booktitle = "{Frontiers '92, the Fourth Symposium on the Frontiers of Massive Parallel Computation, October 19--21, 1992, McLean, Virginia}", title = "{Frontiers '92, the Fourth Symposium on the Frontiers of Massive Parallel Computation, October 19--21, 1992, McLean, Virginia}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 592", year = "1992", ISBN = "0-8186-2772-7", ISBN-13 = "978-0-8186-2772-9", LCCN = "QA76.58.S95 1992", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 92CH3185-6.", acknowledgement = ack-nhfb, confsponsor = "IEEE; NASA", } @Proceedings{Siegel:1992:FSF, editor = "H. J. Siegel", booktitle = "{The Fourth Symposium on the Frontiers of Massively Parallel Computation: Frontiers '92 / October 19--21, 1992, McLean Virginia}", title = "{The Fourth Symposium on the Frontiers of Massively Parallel Computation: Frontiers '92 / October 19--21, 1992, McLean Virginia}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 592", year = "1992", ISBN = "0-8186-2772-7", ISBN-13 = "978-0-8186-2772-9", LCCN = "QA76.58.S95 1992", bibdate = "Wed Apr 16 07:25:17 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 92CH3185-6.", acknowledgement = ack-nhfb, } @Proceedings{Verkerk:1992:PIC, editor = "C. Verkerk and W. Wojcik", booktitle = "{Proceedings of the International Conference on Computing in High Energy Physics '92, Annecy, France, 21--25 September 1992}", title = "{Proceedings of the International Conference on Computing in High Energy Physics '92, Annecy, France, 21--25 September 1992}", publisher = "CERN", address = "Geneve, Switzerland", pages = "xxiii + 916", year = "1992", ISBN = "92-9083-049-2", ISBN-13 = "978-92-9083-049-8", LCCN = "QC783.3 C65 1992", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "CERN report 92-07.", acknowledgement = ack-nhfb, pubcountry = "Switzerland", } @Proceedings{Anonymous:1993:ATA, editor = "Anonymous", booktitle = "{Automotive technology and automation: Supercomputer applications in the automotive industries: 26th International symposium --- September 1993, Aachen, Germany}", title = "{Automotive technology and automation: Supercomputer applications in the automotive industries: 26th International symposium --- September 1993, Aachen, Germany}", publisher = "Automotive Automation Ltd", address = "Croydon, UK", pages = "????", year = "1993", ISBN = "0-947719-62-8", ISBN-13 = "978-0-947719-62-3", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "ISATA --- Proceedings --- 26th", acknowledgement = ack-nhfb, sponsor = "ISATA. ENEA; Agency: Italy.", } @Proceedings{Anonymous:1993:CDP, editor = "Anonymous", booktitle = "{The commercial dimensions of parallel computing: UNICOM seminar --- April 1993, London}", title = "{The commercial dimensions of parallel computing: UNICOM seminar --- April 1993, London}", publisher = "Unicom Seminars Ltd", address = "????", pages = "????", year = "1993", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1993:ISA, editor = "Anonymous", booktitle = "{International section: Annual conference --- September 1993, Gallipoli, Italy}", title = "{International section: Annual conference --- September 1993, Gallipoli, Italy}", publisher = "AICA", address = "????", pages = "????", year = "1993", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Atti del Congresso Annuale --- Associazione Italiana per l'Informatica ed il Calcolo Automatico 1993", acknowledgement = ack-nhfb, sponsor = "Italian Association for Informatics and Automatic Computation.", } @Proceedings{Anonymous:1993:JFI, editor = "Anonymous", booktitle = "{Joint framework for information technology: Technical conference --- March 1993, Keele}", title = "{Joint framework for information technology: Technical conference --- March 1993, Keele}", publisher = "Dept. of Trade and Industry, Information and Manufacturing Division", address = "London, UK", pages = "????", year = "1993", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "JFIT Technical Conference Digest", acknowledgement = ack-nhfb, sponsor = "Great Britain; Department of Trade and Industry. Science and Engineering Research Council.", } @Proceedings{Anonymous:1993:PSE, editor = "Anonymous", booktitle = "{Proceedings. SHARE Europe Anniversary Meeting. Client/Server --- the Promise and the Reality: October 25--28, 1993, the Hague, the Netherlands}", title = "{Proceedings. SHARE Europe Anniversary Meeting. Client/Server --- the Promise and the Reality: October 25--28, 1993, the Hague, the Netherlands}", publisher = "SHARE Europe", address = "Geneva, Switzerland", pages = "xxi + 1002", year = "1993", ISBN = "????", ISBN-13 = "????", ISSN = "0254-6213", LCCN = "????", bibdate = "Wed Apr 16 11:45:17 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1993:SEC, editor = "Anonymous", booktitle = "{Supercomputing Europe '93. Conference Papers}", title = "{Supercomputing Europe '93. Conference Papers}", publisher = "Royal Dutch Fairs", address = "Utrecht, Netherlands", pages = "251", year = "1993", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "C5220P (Parallel architecture); C5440 (Multiprocessor systems and techniques); C7300 (Natural sciences); C7400 (Engineering)", confdate = "22--24 Feb. 1993", conflocation = "Utrecht, Netherlands", keywords = "Aerospace applications; High Performance Fortran; Parallel architectures; Parallel software; Scientific applications; Scientific visualisation; Superconducting environments; Workstation clusters", pubcountry = "Netherlands", thesaurus = "Engineering computing; Natural sciences computing; Parallel architectures; Parallel processing; Software engineering", } @Proceedings{Bhargava:1993:PIW, editor = "Bharat Bhargava", booktitle = "{Proceedings of the IEEE Workshop on Advances in Parallel and Distributed Systems, October 6, 1993, Princeton, New Jersey}", title = "{Proceedings of the IEEE Workshop on Advances in Parallel and Distributed Systems, October 6, 1993, Princeton, New Jersey}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "viii + 170", year = "1993", ISBN = "0-8186-5250-0, 0-8186-5251-9", ISBN-13 = "978-0-8186-5250-9, 978-0-8186-5251-6", LCCN = "QA76.58.I444 1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "IEEE. Computer Society. Technical Committee on Distributed Processing.", } @Proceedings{Brebbia:1993:ASE, editor = "C. A. Brebbia and H. Power", booktitle = "{Applications of Supercomputers in Engineering III, 27--29 September 1993, Bath, UK}", title = "{Applications of Supercomputers in Engineering III, 27--29 September 1993, Bath, UK}", publisher = "Computational Mechanics Publication", address = "London, UK", pages = "561", year = "1993", ISBN = "1-85312-236-X", ISBN-13 = "978-1-85312-236-1", LCCN = "TA345.I556 1993", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Gawman:1993:PCT, editor = "Ann Gawman and W. Morven Gentleman and E. Kidd and Per-{\AA}ke Larson and J. Slonim", booktitle = "{Proceedings CASCON '93: Toronto, Ontario, Canada, 24--28 October 1993}", title = "{Proceedings CASCON '93: Toronto, Ontario, Canada, 24--28 October 1993}", publisher = "Nat. Res. Council of Canada", address = "Ottawa, Ont., Canada", pages = "xx + 1180", year = "1993", ISBN = "????", ISBN-13 = "????", LCCN = "QA76.76.S64 C378 1993 v.1-2", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", acknowledgement = ack-nhfb, pubcountry = "Canada", } @Proceedings{Grebe:1993:TAS, editor = "R. Grebe and J. Hektor and S. C. Hilton and M. R. Jane and P. H. Welch", booktitle = "{Transputer applications and systems '93: proceedings of the 1993 World Transputer Congress, 20--22 September 1993, Aachen, Germany}", title = "{Transputer applications and systems '93: proceedings of the 1993 World Transputer Congress, 20--22 September 1993, Aachen, Germany}", publisher = pub-IOS, address = pub-IOS:adr, pages = "1317", year = "1993", ISBN = "90-5199-140-1", ISBN-13 = "978-90-5199-140-6", LCCN = "????", bibdate = "Wed Apr 16 11:39:32 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "20--22 Sept. 1993", conflocation = "Aachen, Germany", pubcountry = "Netherlands", } @Proceedings{Hoffmann:1993:PFE, editor = "Geerd-R. Hoffmann and Tuomo Kauranne", booktitle = "{Proceedings of the Fifth ECMWF Workshop on the Use of Parallel Processors in Meteorology. Parallel Supercomputing in Atmospheric Science}", title = "{Proceedings of the Fifth ECMWF Workshop on the Use of Parallel Processors in Meteorology. Parallel Supercomputing in Atmospheric Science}", publisher = pub-WORLD-SCI, address = pub-WORLD-SCI:adr, pages = "ix + 532", year = "1993", ISBN = "981-02-1429-4", ISBN-13 = "978-981-02-1429-6", LCCN = "QA76.58 E354 1992", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "23--27 Nov. 1992", conflocation = "Reading, UK", pubcountry = "Singapore", } @Proceedings{IEEE:1993:DPC, editor = "{IEEE}", booktitle = "{Digest of papers: Compcon spring '93, San Francisco, California, February 22--26, 1993}", title = "{Digest of papers: Compcon spring '93, San Francisco, California, February 22--26, 1993}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xv + 609", year = "1993", ISBN = "0-8186-3400-6", ISBN-13 = "978-0-8186-3400-0", LCCN = "QA75.5.C58 1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 93CH3251-6.", acknowledgement = ack-nhfb, } @Proceedings{IEEE:1993:PFW, editor = "{IEEE}", booktitle = "{Proceedings of the Fourth Workshop on Future Trends of Distributed Computing Systems, September 22--24, 1993, Lisbon, Portugal}", title = "{Proceedings of the Fourth Workshop on Future Trends of Distributed Computing Systems, September 22--24, 1993, Lisbon, Portugal}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "x + 485", year = "1993", ISBN = "0-8186-4430-3", ISBN-13 = "978-0-8186-4430-6", LCCN = "QA76.9.D5I335 1993", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 93TH0574-4.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Tech. Committee on Distributed Process", } @Proceedings{IEEE:1993:PIS, editor = "{IEEE}", booktitle = "{Proceedings of the 2nd International Symposium on High Performance Distributed Computing, July 20--23, 1993, Spokane, Washington, Cavanaugh's Inn at the Park}", title = "{Proceedings of the 2nd International Symposium on High Performance Distributed Computing, July 20--23, 1993, Spokane, Washington, Cavanaugh's Inn at the Park}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiv + 353", year = "1993", ISBN = "0-8186-3900-8, 0-8186-3901-6", ISBN-13 = "978-0-8186-3900-5, 978-0-8186-3901-2", LCCN = "QA76.9.D5I593 1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 93TH0550-4.", series = "Proceedings of the International Symposium on High Performance Distributed Computing 2nd", acknowledgement = ack-nhfb, sponsor = "IEEE Computer Society. Syracuse University; Northeast Parallel Architectures Center. Washington State University.", } @Proceedings{IEEE:1993:PSI, editor = "{IEEE}", booktitle = "{Proceedings / Seventh International Parallel Processing Symposium, April 13--16, 1993, Newport Beach, California}", title = "{Proceedings / Seventh International Parallel Processing Symposium, April 13--16, 1993, Newport Beach, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xx + 858", year = "1993", ISBN = "0-8186-3442-1", ISBN-13 = "978-0-8186-3442-0", LCCN = "QA 76.58 I56 1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 93TH0513-2.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc.; ACM Sigarch", } @Proceedings{IEEE:1993:PSP, editor = "{IEEE}", key = "Supercomputing'93", booktitle = "{Proceedings, Supercomputing '93: Portland, Oregon, November 15--19, 1993}", title = "{Proceedings, Supercomputing '93: Portland, Oregon, November 15--19, 1993}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxii + 935", year = "1993", ISBN = "0-8186-4340-4 (paperback), 0-8186-4341-2 (microfiche), 0-8186-4342-0 (hardback), 0-8186-4346-3 (CD-ROM)", ISBN-13 = "978-0-8186-4340-8 (paperback), 978-0-8186-4341-5 (microfiche), 978-0-8186-4342-2 (hardback), 978-0-8186-4346-0 (CD-ROM)", ISSN = "1063-9535", LCCN = "QA76.5 .S96 1993", bibdate = "Mon Jan 15 11:06:21 1996", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, classification = "631.1; 722.1; 722.3; 722.4; 723.2; 921.6", keywords = "Algorithms; Cache coherence; Clustered workstations; Computer graphics; Computer networks; Computer programming languages; Data parallel compilers; Data partitioning; Distributed computer systems; Eigenvalues and eigenfunctions; Finite element method; Flow visualization; Fluid mechanics; Linear algebra; Mass storage; Massively parallel processors; Natural sciences computing; Parallel languages; Parallel processing systems; Parallel rendering; Program compilers; Quantum theory; Scheduling; Sparse matrices; Supercomputers", sponsor = "Institute of Electrical and Electronics Engineers; Computer Society. Association for Computing Machinery; SIGARCH.", } @Proceedings{IEEE:1993:WHP, editor = "{IEEE}", key = "WHP'92", booktitle = "{Workshop on Heterogeneous Processing (1992: Beverly Hills, Calif.) Proceedings / Workshop on Heterogeneous Processing, March 23, 1992, Beverly Hills, California}", title = "{Workshop on Heterogeneous Processing (1992: Beverly Hills, Calif.) Proceedings / Workshop on Heterogeneous Processing, March 23, 1992, Beverly Hills, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "ix + 101", year = "1993", ISBN = "0-8186-2702-6", ISBN-13 = "978-0-8186-2702-6", LCCN = "QA76.58 .W654 1992", bibdate = "Tue Jan 16 07:27:01 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Kowalik:1993:SPC, editor = "Janusz S. Kowalik and Lucio Grandinetti", booktitle = "{Software for parallel computation: Proceedings of the NATO Advanced Workshop on Software for Parallel Computation, held at Cetraro, Cosenza, Italy, June 22--26, 1992}", title = "{Software for parallel computation: Proceedings of the NATO Advanced Workshop on Software for Parallel Computation, held at Cetraro, Cosenza, Italy, June 22--26, 1992}", volume = "106", publisher = pub-SV, address = pub-SV:adr, pages = "ix + 363", year = "1993", ISBN = "3-540-56451-9 (Berlin), 0-387-56451-9 (New York)", ISBN-13 = "978-3-540-56451-5 (Berlin), 978-0-387-56451-7 (New York)", LCCN = "QA76.58 .S629 1993", bibdate = "Sat Feb 24 09:43:28 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "NATO ASI series. Series F, Computer and systems sciences", acknowledgement = ack-nhfb, } @Proceedings{Law:1993:EDM, editor = "K. H. Law and R. E. Fulton and others", booktitle = "{Engineering data management: key to success in a global market: proceedings of the 1993 ASME International Computers in Engineering Conference and Exposition, August 8--12, San Diego, California}", title = "{Engineering data management: key to success in a global market: proceedings of the 1993 ASME International Computers in Engineering Conference and Exposition, August 8--12, San Diego, California}", publisher = pub-ASME, address = pub-ASME:adr, pages = "vi + 273", year = "1993", ISBN = "0-7918-1169-7", ISBN-13 = "978-0-7918-1169-6", LCCN = "TA345.A86 1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "COMPUTERS IN ENGINEERING VOL COM", acknowledgement = ack-nhfb, sponsor = "ASME; Computers in Engineering Division.", } @Proceedings{Mudge:1993:PTS, editor = "T. N. Mudge and V. Milutinovic and L. Hunter", booktitle = "{Proceedings of the Twenty-Sixth Hawaii International Conference on System Science (HICSS-26), held in Wailea, Hawaii in January 5--8, 1993}", title = "{Proceedings of the Twenty-Sixth Hawaii International Conference on System Science (HICSS-26), held in Wailea, Hawaii in January 5--8, 1993}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvi + 895 (vol. 1), xiv + 691 (vol. 2), xii + 654 (vol. 3), xv + 889 (vol. 4)", year = "1993", ISBN = "0-8186-3230-5", ISBN-13 = "978-0-8186-3230-3", LCCN = "????", bibdate = "Wed Apr 16 11:35:41 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Four volumes. IEEE catalog number 93TH0501-7.", acknowledgement = ack-nhfb, } @Proceedings{Schill:1993:DOD, editor = "Alexander Schill", booktitle = "{DCE} --- the {OSF} distributed computing environment: client\slash server model and beyond: {International DCE Workshop, Karlsruhe, Germany, October 7--8, 1993: proceedings}", title = "{DCE} --- the {OSF} distributed computing environment: client\slash server model and beyond: {International DCE Workshop, Karlsruhe, Germany, October 7--8, 1993: proceedings}", number = "731", publisher = pub-SV, address = pub-SV:adr, pages = "283", year = "1993", ISBN = "3-540-57306-2, 0-387-57306-2", ISBN-13 = "978-3-540-57306-7, 978-0-387-57306-9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.9.C55I58 1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, sponsor = "German Association of Computer Science.", } @Proceedings{Sincovec:1993:SCP, editor = "Richard F. Sincovec", booktitle = "{SIAM Conference on Parallel Processing for Scientific Computing (6th: 1993: Norfolk, VA, USA)}", title = "{SIAM Conference on Parallel Processing for Scientific Computing (6th: 1993: Norfolk, VA, USA)}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "xix + 1041 + iv", year = "1993", ISBN = "0-89871-315-3", ISBN-13 = "978-0-89871-315-2", LCCN = "QA 76.58 S55 1993", bibdate = "Wed Aug 14 10:36:11 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", acknowledgement = ack-nhfb, sponsor = "Society for Industrial and Applied Mathematics.", } @Proceedings{Volkert:1993:PCS, editor = "Jens Volkert", booktitle = "{Parallel computation: Second International ACPC Conference, Gmunden, Austria, October 4--6, 1993: proceedings}", title = "{Parallel computation: Second International ACPC Conference, Gmunden, Austria, October 4--6, 1993: proceedings}", volume = "734", publisher = pub-SV, address = pub-SV:adr, pages = "viii + 248", year = "1993", ISBN = "3-540-57314-3 (Berlin), 0-387-57314-3 (New York)", ISBN-13 = "978-3-540-57314-2 (Berlin), 978-0-387-57314-4 (New York)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA267.A1 L43 no.734", bibdate = "Wed Apr 16 11:41:47 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "DM58.00", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "parallel processing (electronic computers) -- congresses", sponsor = "Austrian Center for Parallel Computation.", } @Proceedings{Yelon:1993:PTS, editor = "W. B. Yelon and others", booktitle = "{Proceedings of the Thirty-seventh Annual Conference on Magnetism and Magnetic Materials: December 1--4, 1992, Houston, Texas}", title = "{Proceedings of the Thirty-seventh Annual Conference on Magnetism and Magnetic Materials: December 1--4, 1992, Houston, Texas}", volume = "73(10)", publisher = pub-AIP, address = pub-AIP:adr, pages = "5309--7023", month = may, year = "1993", CODEN = "JAPIAU", ISBN = "1-56396-212-8", ISBN-13 = "978-1-56396-212-7", ISSN = "0021-8979 (print), 1089-7550 (electronic), 1520-8850", LCCN = "QC753 .C748 1990", bibdate = "Sun Dec 22 10:17:40 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", series = j-J-APPL-PHYS, acknowledgement = ack-nhfb, confsponsor = "AIP; IEEE", } @Proceedings{ACM:1994:CPI, editor = "{ACM}", booktitle = "{Conference Proceedings. 1994 International Conference on Supercomputing}", title = "{Conference Proceedings. 1994 International Conference on Supercomputing}", publisher = pub-ACM, address = pub-ACM:adr, pages = "xii + 439", year = "1994", ISBN = "0-89791-665-4", ISBN-13 = "978-0-89791-665-3", LCCN = "????", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/contents/proceedings/supercomputing/181181/", acknowledgement = ack-nhfb, confdate = "11--15 July 1994", conflocation = "Manchester, UK", confsponsor = "ACM", } @Proceedings{Agrawal:1994:PIC, editor = "Dharma P. Agrawal and K. C. (Kuo Chung) Tai and Jagdish Chandra", booktitle = "{Proceedings of the 1994 International Conference on Parallel Processing, August 15--19, 1994. Vol 3: Algorithms and applications}", title = "{Proceedings of the 1994 International Conference on Parallel Processing, August 15--19, 1994. Vol 3: Algorithms and applications}", publisher = pub-CRC, address = pub-CRC:adr, pages = "xvii + 301 (vol. 1), xviii + 323 (vol. 2), 297 (vol. 3)", year = "1994", ISBN = "0-8493-2496-3, 0-8493-2495-5", ISBN-13 = "978-0-8493-2496-3, 978-0-8493-2495-6", ISSN = "0190-3918", LCCN = "QA 76.58 I55 1994", bibdate = "Wed Aug 14 10:37:00 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Three volumes.", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1994:FWR, editor = "Anonymous", booktitle = "{Forschung und wissenschaftliches Rechnen: Beitrage anasslich des 10. EDV-Benutzertreffens der Max-Planck-Gesellschaft in G{\"o}ttingen, November 1993}", title = "{Forschung und wissenschaftliches Rechnen: Beitrage anasslich des 10. EDV-Benutzertreffens der Max-Planck-Gesellschaft in G{\"o}ttingen, November 1993}", number = "1", publisher = "Max-Planck-Gesellschaft", address = "M{\"u}nchen, Germany", pages = "270", year = "1994", ISBN = "????", ISBN-13 = "????", ISSN = "0341-7778", LCCN = "Q180.55.E4 M39 1993", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Berichte und Mitteilungen --- Max Planck Gesellschaft", acknowledgement = ack-nhfb, sponsor = "Max-Planck-Gesellschaft.", } @Proceedings{Anonymous:1994:ICS, editor = "Anonymous", booktitle = "{1994 International Computer Symposium Conference Proceedings}", title = "{1994 International Computer Symposium Conference Proceedings}", publisher = "Nat. Chiao Tung Univ", address = "Hsinchu, Taiwan", pages = "xvi + 1310", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "2 vol.", acknowledgement = ack-nhfb, confdate = "12--15 Dec. 1994", conflocation = "Hsinchu, Taiwan", confsponsor = "Ministr. Educ.; Comput. Soc", pubcountry = "Taiwan", } @Proceedings{Anonymous:1994:PDC, editor = "Anonymous", booktitle = "{Parallel and distributed computing systems: proceedings of the ISCA International Conference, Las Vegas, Nevada, U.S.A., October 6--8, 1994}", title = "{Parallel and distributed computing systems: proceedings of the ISCA International Conference, Las Vegas, Nevada, U.S.A., October 6--8, 1994}", publisher = "ISCA", address = "Raleigh, NC, USA", pages = "x + 870", year = "1994", ISBN = "1-880843-09-9", ISBN-13 = "978-1-880843-09-3", LCCN = "QA76.58.I543 1994", bibdate = "Fri Feb 01 06:55:36 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1994:PPC, editor = "Anonymous", booktitle = "{Parallel processing comes of age: real applications from industry and commerce: Seminar --- June 1994, London}", title = "{Parallel processing comes of age: real applications from industry and commerce: Seminar --- June 1994, London}", publisher = "Unicom Seminars", address = "????", pages = "????", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "Unicom.", } @Proceedings{Anonymous:1994:PSE, editor = "Anonymous", booktitle = "{Proceedings. SHARE Europe Spring Conference}", title = "{Proceedings. SHARE Europe Spring Conference}", publisher = "SHARE Europe (SEAS)", address = "Carouge/Geneva, Switzerland", pages = "xix + 810", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "18--21 April 1994", conflocation = "Brussels, Belgium", pubcountry = "Switzerland", } @Proceedings{Anonymous:1994:SCC, editor = "Anonymous", booktitle = "{Small college computing: 27th Annual symposium --- April 1994, Winona, MN}", title = "{Small college computing: 27th Annual symposium --- April 1994, Winona, MN}", publisher = "SCCS", address = "????", pages = "????", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "SCCS --- Proceedings --- 27th", acknowledgement = ack-nhfb, } @Proceedings{Anonymous:1994:SQC, editor = "Anonymous", booktitle = "{Software quality concern for people: proceedings of the fourth European Conference on Software Quality, October 17--20, 1994, Basel, Switzerland}", title = "{Software quality concern for people: proceedings of the fourth European Conference on Software Quality, October 17--20, 1994, Basel, Switzerland}", publisher = "vdf Verlag der Fachvereine", address = "Zurich, Switzerland", pages = "538", year = "1994", ISBN = "3-7281-2153-3", ISBN-13 = "978-3-7281-2153-0", LCCN = "????", bibdate = "Wed Apr 16 11:49:47 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Arnold:1994:PCT, editor = "D. Arnold and R. Christie and J. Day and P. Roe", booktitle = "{Parallel Computing and Transputers. PCAT-93. Proceedings of the 6th Australian Transputer and Occam User Group Conference, November 3--4, 1993, Brisbane, Queensland, Australia}", title = "{Parallel Computing and Transputers. PCAT-93. Proceedings of the 6th Australian Transputer and Occam User Group Conference, November 3--4, 1993, Brisbane, Queensland, Australia}", volume = "37", publisher = pub-IOS, address = pub-IOS:adr, pages = "383", year = "1994", ISBN = "90-5199-149-5", ISBN-13 = "978-90-5199-149-9", LCCN = "????", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Transputer and Occam Engineering Series", acknowledgement = ack-nhfb, pubcountry = "Netherlands", } @Proceedings{Becks:1994:NCT, editor = "K.-H. Becks and D. Perret-Gallix", booktitle = "{New computing techniques in physics research III: proceedings of the Third International Workshop on Software Engineering, Artificial Intelligence and Expert Systems for High Energy and Nuclear Physics: October 4--8, 1993, Oberammergau, Germany}", title = "{New computing techniques in physics research III: proceedings of the Third International Workshop on Software Engineering, Artificial Intelligence and Expert Systems for High Energy and Nuclear Physics: October 4--8, 1993, Oberammergau, Germany}", publisher = pub-WORLD-SCI, address = pub-WORLD-SCI:adr, pages = "xvii + 664", year = "1994", ISBN = "981-02-1699-8", ISBN-13 = "978-981-02-1699-3", LCCN = "QC793.47.E4I58 1993", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, pubcountry = "Singapore", } @Proceedings{Bolding:1994:PCR, editor = "Kevin Bolding and Lawrence Snyder", booktitle = "{Parallel computer routing and communication: first international workshop, PCRCW '94, Seattle, Washington, USA, May 16--18, 1994: proceedings}", title = "{Parallel computer routing and communication: first international workshop, PCRCW '94, Seattle, Washington, USA, May 16--18, 1994: proceedings}", number = "853", publisher = pub-SV, address = pub-SV:adr, pages = "ix + 317", year = "1994", ISBN = "3-540-58429-3", ISBN-13 = "978-3-540-58429-2", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.P39 1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, conflocation = "Seattle, WA, USA; 16-18 May 1994", corpsource = "Dept. of Comput. Sci. and Eng., Washington Univ., Seattle, WA, USA", pubcountry = "Germany", treatment = "P Practical", } @Proceedings{Calmet:1994:RWC, editor = "J. Calmet", booktitle = "{Rhine workshop on computer algebra --- March 22--24, 1994, Karlsruhe, Germany}", title = "{Rhine workshop on computer algebra --- March 22--24, 1994, Karlsruhe, Germany}", publisher = "Universit{\"a}t Karlsruhe", address = "Karlsruhe, Germany", pages = "v + 224", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "University of Karlsruhe. Faculty of Informatics. Institute of Algorithms and Cognitive Systems.", } @Proceedings{Davidor:1994:PPS, editor = "Yuval Davidor and Hans-Paul Schwefel and Reinhard Manner", booktitle = "{Parallel problem solving from nature --- PPSN III: International Conference on Evolutionary Computation, the Third Conference on Parallel Problem Solving from Nature, Jerusalem, Israel, October 9--14, 1994: proceedings}", title = "{Parallel problem solving from nature --- PPSN III: International Conference on Evolutionary Computation, the Third Conference on Parallel Problem Solving from Nature, Jerusalem, Israel, October 9--14, 1994: proceedings}", number = "866", publisher = pub-SV, address = pub-SV:adr, pages = "xv + 642", year = "1994", ISBN = "3-540-58484-6", ISBN-13 = "978-3-540-58484-1", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .I535 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, } @Proceedings{Decker:1994:PEM, editor = "K. M. (Karsten M.) Decker and R. M. (Rene M.) Rehmann", booktitle = "{Programming environments for massively parallel distributed systems: working conference of the IFIP WG10.3, April 25--29, 1994, Ascona, Italy}", title = "{Programming environments for massively parallel distributed systems: working conference of the IFIP WG10.3, April 25--29, 1994, Ascona, Italy}", publisher = pub-BIRKHAUSER, address = pub-BIRKHAUSER:adr, pages = "xiv + 420", year = "1994", ISBN = "0-8176-5090-3 (Boston), 3-7643-5090-3 (Basel)", ISBN-13 = "978-0-8176-5090-2 (Boston), 978-3-7643-5090-1 (Basel)", LCCN = "QA76.58.P767 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "IFIP WG10.3.", } @Proceedings{deGloria:1994:TAS, editor = "A. de Gloria and M. R. Jane and D. Marini", booktitle = "{Transputer Applications and Systems '94. Proceedings of the 1994 World Transputer Congress}", title = "{Transputer Applications and Systems '94. Proceedings of the 1994 World Transputer Congress}", publisher = pub-IOS, address = pub-IOS:adr, pages = "xi + 1009", year = "1994", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "5--7 Sept. 1994", conflocation = "Como, Italy", confsponsor = "Transputer Consortium; SGS-Thomson Microelectron.; Eur. Union; Italian Transputer User Group", pubcountry = "Netherlands", } @Proceedings{Dekker:1994:MPP, editor = "L. (Leendert) Dekker and W. Smit and J. C. Zuidervaart", booktitle = "{Massively parallel processing applications and development: proceedings of the 1994 EUROSIM Conference on Massively Parallel Processing Applications and Development, Delft, The Netherlands, 21--23 June 1994}", title = "{Massively parallel processing applications and development: proceedings of the 1994 EUROSIM Conference on Massively Parallel Processing Applications and Development, Delft, The Netherlands, 21--23 June 1994}", publisher = pub-ELS, address = pub-ELS:adr, pages = "xxii + 973", year = "1994", ISBN = "0-444-81784-0", ISBN-13 = "978-0-444-81784-6", LCCN = "QA76.58.E98 1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confsponsor = "AKZO NOBEL; BSO; Convex Comput.; HPCN projects; IBM; NOWESP; et al", pubcountry = "Netherlands", } @Proceedings{Dongarra:1994:PSC, editor = "Jack Dongarra and Jerzy Wasniewski", booktitle = "{Parallel scientific computing: First International Workshop, PARA '94, Lyngby, Denmark, June 20--23, 1994: proceedings}", title = "{Parallel scientific computing: First International Workshop, PARA '94, Lyngby, Denmark, June 20--23, 1994: proceedings}", volume = "879", publisher = pub-SV, address = pub-SV:adr, pages = "xi + 566", year = "1994", ISBN = "3-540-58712-8 (Berlin), 0-387-58712-8 (New York)", ISBN-13 = "978-3-540-58712-5 (Berlin), 978-0-387-58712-7 (New York)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .P35 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", price = "DM104.00", series = ser-LNCS, acknowledgement = ack-nhfb, confsponsor = "Danish Comput. Centre for Res. and Educ.; Inst. Math. Modelling; Tech. Univ. Denmark", pubcountry = "Germany", sponsor = "Danish Computing Centre for Research and Education. Technical University of Denmark; Institute for Mathematical Modelling.", } @Proceedings{Dongarra:1994:PSW, editor = "Jack J. Dongarra and Bernard Tourancheau", booktitle = "{Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing: Townsend, TN, USA, 25--27 May 1994}", title = "{Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing: Townsend, TN, USA, 25--27 May 1994}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "x + 292", year = "1994", ISBN = "0-89871-343-9", ISBN-13 = "978-0-89871-343-5", LCCN = "QA76.58.I568 1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, conflocation = "Townsend, TN, USA; 25-27 May 1994", conftitle = "Proceedings of the Second Workshop on Environments and Tools for Parallel Scientific Computing", corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", treatment = "P Practical", } @Proceedings{Gentzsch:1994:HPC, editor = "Wolfgang Gentzsch and Uwe Harms", booktitle = "{High-performance computing and networking: international conference and exhibition, Munich, Germany, April 18--20, 1994: proceedings}", title = "{High-performance computing and networking: international conference and exhibition, Munich, Germany, April 18--20, 1994: proceedings}", volume = "797", publisher = pub-SV, address = pub-SV:adr, pages = "xxii + 519", year = "1994", ISBN = "0-387-57981-8 (New York), 3-540-57981-8 (Berlin)", ISBN-13 = "978-0-387-57981-8 (New York), 978-3-540-57981-6 (Berlin)", LCCN = "QA76.88.I57 1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", price = "DM96.00", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, conftitle = "High-Performance Computing and Networking International Conference. Proceedings, Volume II: Networking and Tools", corpsource = "German Nat. Res. Center for Comput. Sci., St. Augustin, Germany", pubcountry = "Germany", treatment = "P Practical", } @Proceedings{Gruber:1994:PJE, editor = "Ralf Gruber and Marco Tomassini", booktitle = "{Proceedings of the 6th Joint EPS-APS International Conference on Physics Computing: Physics Computing '94, Palazzo dei Congressi, Lugano, Switzerland, 22--26 August 1994}", title = "{Proceedings of the 6th Joint EPS-APS International Conference on Physics Computing: Physics Computing '94, Palazzo dei Congressi, Lugano, Switzerland, 22--26 August 1994}", publisher = "European Physical Society", address = "Geneva, Switzerland", pages = "xvii + 730", year = "1994", ISBN = "2-88270-011-3", ISBN-13 = "978-2-88270-011-7", LCCN = "QC20.7.E4I58 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, pubcountry = "Switzerland", } @Proceedings{Hesham:1994:PTS, editor = "E.-R. Hesham and B. D. Shriver", booktitle = "{Proceedings of the Twenty-Seventh Hawaii International Conference on System Sciences. Vol. II: Software Technology, January 4--7, 1994, Wailea, HI, USA}", title = "{Proceedings of the Twenty-Seventh Hawaii International Conference on System Sciences. Vol. II: Software Technology, January 4--7, 1994, Wailea, HI, USA}", volume = "27", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xv + 681", year = "1994", ISBN = "0-8186-5060-5", ISBN-13 = "978-0-8186-5060-4", ISSN = "1060-3425", LCCN = "????", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94TH0607-2.", acknowledgement = ack-nhfb, confsponsor = "IEEE; ACM; Univ. Hawaii; Univ. Hawaii Coll. Bus. Admin", } @Proceedings{Horiguchi:1994:ISP, editor = "S. Horiguchi and D. Frank Hsu and M. Kimura", booktitle = "{International Symposium on Parallel Architectures, Algorithms, and Networks (ISPAN): proceedings of the 1994, December 14--16, 1994, Kanazawa, Japan}", title = "{International Symposium on Parallel Architectures, Algorithms, and Networks (ISPAN): proceedings of the 1994, December 14--16, 1994, Kanazawa, Japan}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xi + 452", year = "1994", ISBN = "0-8186-6507-6 (case), 0-8186-6506-8 (microfiche)", ISBN-13 = "978-0-8186-6507-3 (case), 978-0-8186-6506-6 (microfiche)", LCCN = "QA76.58 .I5673 1994 Bar", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 94TH0697-3.", acknowledgement = ack-nhfb, keywords = "parallel processing (electronic computers) -- congresses", } @Proceedings{IEEE:1994:FSF, editor = "{IEEE}", booktitle = "{Frontiers'95, the 5th Symposium on the Frontiers of Massively Parallel Computation: proceedings, February 6--9, 1995, McLean, Virginia}", title = "{Frontiers'95, the 5th Symposium on the Frontiers of Massively Parallel Computation: proceedings, February 6--9, 1995, McLean, Virginia}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvi + 539", year = "1994", ISBN = "0-8186-6965-9", ISBN-13 = "978-0-8186-6965-1", LCCN = "QA76.58.S95 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95TH8024.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.; NASA; Univ. Maryland Inst. Adv. Comput. Studies; George Mason Univ", } @Proceedings{IEEE:1994:IPN, editor = "{IEEE}", booktitle = "{ICIP '94: proceedings, November 13--16, 1994, Austin Convention Center, Austin, Texas}", title = "{ICIP '94: proceedings, November 13--16, 1994, Austin Convention Center, Austin, Texas}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "liii + 992 (vol. 1), 1064 (vol. 2), 1050 (vol. 3)", year = "1994", ISBN = "0-8186-6952-7 (casebound), 0-8186-6950-0 (paperback), 0-8186-6951-9 (microfiche)", ISBN-13 = "978-0-8186-6952-1 (casebound), 978-0-8186-6950-7 (paperback), 978-0-8186-6951-4 (microfiche)", LCCN = "TA1637.I25 1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Three volumes. IEEE catalog no. 94CH35708.", acknowledgement = ack-nhfb, confsponsor = "IEEE Signal Process. Soc", } @Proceedings{IEEE:1994:OOE, editor = "{IEEE}", booktitle = "{Oceans 94: Oceans engineering for today's technology and tomorrow's preservation: proceedings, 13--16 September 13--16, 1994, Brest, France}", title = "{Oceans 94: Oceans engineering for today's technology and tomorrow's preservation: proceedings, 13--16 September 13--16, 1994, Brest, France}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xl + 905 (vol. 1), xl + 727 (vol. 2), xl + 630 (vol. 3)", year = "1994", ISBN = "0-7803-2057-3, 0-7803-2056-5, 0-7803-2058-1", ISBN-13 = "978-0-7803-2057-4, 978-0-7803-2056-7, 978-0-7803-2058-1", ISSN = "0197-7385", LCCN = "TC 1505 O33197 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Three volumes. IEEE catalog no. 94CH3472-8.", series = "Oceans", acknowledgement = ack-nhfb, sponsor = "IEEE; Ocean Engineering Society.", } @Proceedings{IEEE:1994:PIF, editor = "{IEEE}", booktitle = "{Proceedings of the 1994 IEEE Frequency Control Symposium (the 48th annual symposium), 1--3 June 1994, Westin Hotel-Copley Place, Boston, Massachusetts, USA}", title = "{Proceedings of the 1994 IEEE Frequency Control Symposium (the 48th annual symposium), 1--3 June 1994, Westin Hotel-Copley Place, Boston, Massachusetts, USA}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvii + 817", year = "1994", ISBN = "0-7803-1945-1", ISBN-13 = "978-0-7803-1945-5", LCCN = "TK 7872 O7 I34 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94CH3446-2.", acknowledgement = ack-nhfb, confsponsor = "IEEE Ultrasonics, Ferroelectr. and Frequency Control Soc", numericalindex = "Frequency 1.0E+09 to 2.0E+09 Hz", } @Proceedings{IEEE:1994:PSI, editor = "{IEEE}", booktitle = "{Proceedings / Second International Workshop on Configurable Distributed Systems, March 21--23, 1994, Carnegie Mellon University, Pittsburgh, Pennsylvania}", title = "{Proceedings / Second International Workshop on Configurable Distributed Systems, March 21--23, 1994, Carnegie Mellon University, Pittsburgh, Pennsylvania}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "ix + 215", year = "1994", ISBN = "0-8186-5390-6", ISBN-13 = "978-0-8186-5390-2", LCCN = "QA76.9.D5I595 1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94TH0651-0.", acknowledgement = ack-nhfb, confsponsor = "IEEE; Carnegie Mellon Univ", } @Proceedings{IEEE:1994:PSP, editor = "{IEEE}", booktitle = "{Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi}", title = "{Proceedings of the Scalable Parallel Libraries Conference, October 6--8, 1993, Mississippi State, Mississippi}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "vii + 261", year = "1994", ISBN = "0-8186-4980-1", ISBN-13 = "978-0-8186-4980-6", LCCN = "QA76.58.S34 1993", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, conflocation = "Mississippi State, MS, USA; 6-8 Oct. 1993", confsponsor = "Mississippi State Univ.; Nat. Sci. Found", conftitle = "Proceedings of Scalable Parallel Libraries Conference", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", sponsororg = "Mississippi State Univ.; Nat. Sci. Found", treatment = "P Practical", } @Proceedings{IEEE:1994:PSW, editor = "{IEEE}", booktitle = "{Proceedings, Supercomputing '94: Washington, DC, November 14--18, 1994}", title = "{Proceedings, Supercomputing '94: Washington, DC, November 14--18, 1994}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvii + 823", year = "1994", ISBN = "0-8186-6607-2, 0-8186-6605-6, 0-8186-6606-4", ISBN-13 = "978-0-8186-6607-0, 978-0-8186-6605-6, 978-0-8186-6606-3", ISSN = "1063-9535", LCCN = "QA76.5 .S894 1994", bibdate = "Mon Aug 26 10:38:41 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 94CH34819.", series = "Supercomputing", acknowledgement = ack-nhfb, sponsor = "IEEE.", } @Proceedings{IEEE:1994:PTI, editor = "{IEEE}", booktitle = "{Proceedings of the Third IEEE International Symposium on High Performance Distributed Computing, August 2--5, 1994, San Francisco, California}", title = "{Proceedings of the Third IEEE International Symposium on High Performance Distributed Computing, August 2--5, 1994, San Francisco, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiii + 304", year = "1994", ISBN = "0-8186-6395-2", ISBN-13 = "978-0-8186-6395-6", LCCN = "QA76.9.D5I328 1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94TH0667-6.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Tech. Committee on Distributed Process.; Northeast Parallel Archit. Center (NPAC) at Syracuse Univ.; ACM SIGCOMM", } @Proceedings{Joubert:1994:PCT, editor = "G. R. Joubert and F. J. Peters and D. Trystram and D. J. Evans", booktitle = "{Parallel computing: trends and applications: proceedings of the international conference ParCo93, Grenoble, France, 7--10 September 1993}", title = "{Parallel computing: trends and applications: proceedings of the international conference ParCo93, Grenoble, France, 7--10 September 1993}", volume = "9", publisher = pub-NH, address = pub-NH:adr, pages = "xvi + 728", year = "1994", ISBN = "0-444-81841-3", ISBN-13 = "978-0-444-81841-6", LCCN = "QA76.58 .P3794 1993", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Advances in parallel computing", acknowledgement = ack-nhfb, confsponsor = "ARCHIPEL; CNRS; Elsevier Sci. Publishers; IMAG; INPG; INRIA; et al", pubcountry = "Netherlands", xxeditor = "G. R. Joubert and D. Trystram and F. J. Peters and D. J. Evans", } @Proceedings{Kumar:1994:PPI, editor = "V. K. Prasanna Kumar", booktitle = "{Parallel processing: 1st IWWP: proceedings of the First International Workshop on Parallel Processing (IWPP-94), December 26--31, 1994, Bangalore, India}", title = "{Parallel processing: 1st IWWP: proceedings of the First International Workshop on Parallel Processing (IWPP-94), December 26--31, 1994, Bangalore, India}", publisher = "Tata McGraw-Hill Pub. Co", address = "New Delhi, India", pages = "xxiii + 736", year = "1994", ISBN = "0-07-462332-X", ISBN-13 = "978-0-07-462332-9", LCCN = "QA 76.58 I587 1994", bibdate = "Tue May 12 08:53:36 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Miles:1994:PTO, editor = "Roger Miles and Alan Chalmers", booktitle = "{Progress in Transputer and occam Research, WoTUG-17 Proceedings of the 17th World occam and Transputer User Group Technical Meeting, April 10--13, 1994, Bristol, UK}", title = "{Progress in Transputer and occam Research, WoTUG-17 Proceedings of the 17th World occam and Transputer User Group Technical Meeting, April 10--13, 1994, Bristol, UK}", volume = "38", publisher = pub-IOS, address = pub-IOS:adr, pages = "vii + 221", year = "1994", ISBN = "90-5199-163-0", ISBN-13 = "978-90-5199-163-5", LCCN = "????", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Transputer and Occam Engineering Series", acknowledgement = ack-nhfb, pubcountry = "Netherlands", sponsor = "World occam and Transputer User Group.", } @Proceedings{Ostrand:1994:PIS, editor = "Thomas Ostrand", booktitle = "{Proceedings of the 1994 International Symposium on Software Testing and Analysis (ISSTA): August 17--19, 1994, Seattle, Washington, USA}", title = "{Proceedings of the 1994 International Symposium on Software Testing and Analysis (ISSTA): August 17--19, 1994, Seattle, Washington, USA}", publisher = pub-ACM, address = pub-ACM:adr, year = "1994", CODEN = "SFENDP", ISBN = "0-89791-683-2", ISBN-13 = "978-0-89791-683-7", ISSN = "0163-5948", LCCN = "QA76.76.T48 I58 1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib", series = j-SIGSOFT, acknowledgement = ack-nhfb, fjournal = "ACM SIGSOFT Software Engineering Notes", issue = "spec. issue. p. 216-227", journal-URL = "https://dl.acm.org/citation.cfm?id=J728", } @Proceedings{Pehrson:1994:IPP, editor = "Bj{\"o}rn Pehrson and Imre Simon and Klaus Brunnstein and Eckart Raubold and Karen Duncan and Karl Krueger", booktitle = "{Information processing '94: proceedings of the IFIP 13th World Computer Congress, Hamburg, Germany, 28 August--2 September, 1994}", title = "{Information processing '94: proceedings of the IFIP 13th World Computer Congress, Hamburg, Germany, 28 August--2 September, 1994}", volume = "A-51, A-52, A-53", publisher = pub-NH, address = pub-NH:adr, pages = "402--409", year = "1994", CODEN = "ITATEC", ISBN = "0-444-81990-8, 0-444-81989-4", ISBN-13 = "978-0-444-81990-1, 978-0-444-81989-5", ISSN = "0926-5473", LCCN = "QA75.5.I3785 1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Three volumes.", series = j-IFIP-TRANS-A, acknowledgement = ack-nhfb, pubcountry = "Netherlands", sponsor = "IFIP. Gesellschaft fur Informatik.", } @Proceedings{Pierce:1994:PSH, editor = "P. Pierce and G. Regnier", booktitle = "{Proceedings of the Scalable High-Per\-for\-mance Computing Conference, May 23--25, 1994, Knoxville, Tennessee}", title = "{Proceedings of the Scalable High-Per\-for\-mance Computing Conference, May 23--25, 1994, Knoxville, Tennessee}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 852", year = "1994", ISBN = "0-8186-5680-8, 0-8186-5681-6", ISBN-13 = "978-0-8186-5680-4, 978-0-8186-5681-1", LCCN = "QA76.58.S32 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94TH0637-9.", acknowledgement = ack-nhfb, sponsor = "IEEE Computer Society; Technical Committee on Supercomputing Applications.", } @Proceedings{Sall:1994:CIS, editor = "J. Sall and A. Lehman", booktitle = "{Computational intensive statistical methods: 26th Symposium on the interface --- June 15-18, 1994, Research Triangle Park, NC, USA}", title = "{Computational intensive statistical methods: 26th Symposium on the interface --- June 15-18, 1994, Research Triangle Park, NC, USA}", volume = "26", publisher = "Fairfax Station: Interface Foundation of North America", address = "????", pages = "????", year = "1994", ISBN = "1-886658-00-5", ISBN-13 = "978-1-886658-00-4", LCCN = "QA276.4.S95 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Computing Science and Statistics Conference", acknowledgement = ack-nhfb, sponsor = "Interface Foundation of North America.", } @Proceedings{Siegal:1994:PEI, editor = "Howard Jay Siegal", booktitle = "{Proceedings / Eighth International Parallel Processing Symposium, April 26--29, 1994, Cancun, Mexico}", title = "{Proceedings / Eighth International Parallel Processing Symposium, April 26--29, 1994, Cancun, Mexico}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxx + 966", year = "1994", ISBN = "0-8186-5602-6", ISBN-13 = "978-0-8186-5602-6", LCCN = "QA76.58.I58 1994", bibdate = "Sun Dec 22 10:18:08 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 94CH34819.", acknowledgement = ack-nhfb, confsponsor = "IEEE; ACM", } @Proceedings{Turchi:1994:SDA, editor = "Patrice E. A. Turchi and Antonios Gonis", booktitle = "{Statics and dynamics of alloy phase transformations: Proceedings of a NATO Advanced Study Institute on Statics and Dynamics of Alloy Phase Transformations, held June 21--July 3, 1992, in Rhodes, Greece}", title = "{Statics and dynamics of alloy phase transformations: Proceedings of a NATO Advanced Study Institute on Statics and Dynamics of Alloy Phase Transformations, held June 21--July 3, 1992, in Rhodes, Greece}", volume = "319", publisher = pub-PLENUM, address = pub-PLENUM:adr, pages = "xiii + 737", year = "1994", ISBN = "0-306-44626-X", ISBN-13 = "978-0-306-44626-9", ISSN = "0258-1221", LCCN = "TN690.S77 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "NATO ASI Series B Physics", acknowledgement = ack-nhfb, } @Proceedings{USENIX:1994:PFU, editor = "{USENIX}", booktitle = "{Proceedings of the First USENIX Symposium on Operating Systems Design and Implementation (OSDI), November 14--17, 1994, Monterey, California, USA}", title = "{Proceedings of the First USENIX Symposium on Operating Systems Design and Implementation (OSDI), November 14--17, 1994, Monterey, California, USA}", publisher = pub-USENIX, address = pub-USENIX:adr, pages = "280", year = "1994", ISBN = "1-880446-66-9", ISBN-13 = "978-1-880446-66-9", LCCN = "QA 76.76 O63 U87 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confsponsor = "ACM; IEEE", } @Proceedings{Wagner:1994:CFD, editor = "S. (Siegfried) Wagner and J. (Jacques) Periaux and E. H. (Ernst-Heinrich) Hirschel", booktitle = "{Computational fluid dynamics '94: proceedings of the Second European Computational Fluid Dynamics Conference, 5--8 September 1994, Stuttgart, Germany}", title = "{Computational fluid dynamics '94: proceedings of the Second European Computational Fluid Dynamics Conference, 5--8 September 1994, Stuttgart, Germany}", publisher = pub-WILEY, address = pub-WILEY:adr, pages = "xvi + 1029", year = "1994", ISBN = "0-471-95063-7", ISBN-13 = "978-0-471-95063-9", LCCN = "QA911.E95 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "European Committee on Computational Methods in Applied Sciences.", } @Proceedings{ACM:1995:PAS, editor = "{ACM}", booktitle = "{Proceedings of the 33rd annual southeast conference [ACM]: Clemson, South Carolina, March 17--18, 1995}", title = "{Proceedings of the 33rd annual southeast conference [ACM]: Clemson, South Carolina, March 17--18, 1995}", publisher = pub-ACM, address = pub-ACM:adr, pages = "290", year = "1995", ISBN = "0-89791-747-2", ISBN-13 = "978-0-89791-747-6", LCCN = "????", bibdate = "Wed Apr 16 13:28:48 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "17--18 March 1995", conflocation = "Clemson, SC, USA", confsponsor = "ACM", } @Proceedings{ACM:1995:SAA, editor = "{ACM}", booktitle = "{SPAA '95, 7th Annual ACM Symposium on Parallel Algorithms and Architectures: July 17--19, 1995, Santa Barbara, CA, USA}", title = "{SPAA '95, 7th Annual ACM Symposium on Parallel Algorithms and Architectures: July 17--19, 1995, Santa Barbara, CA, USA}", volume = "7", publisher = pub-ACM, address = pub-ACM:adr, pages = "viii + 308", year = "1995", ISBN = "0-89791-717-0", ISBN-13 = "978-0-89791-717-9", LCCN = "QA76.642 .A25 1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, conflocation = "Santa Barbara, CA, USA; 17-19 July 1995", conftitle = "Proceedings of Seventh Annual ACM Symposium on Parallel Algorithms and Architectures", corpsource = "California Inst. of Technol., Pasadena, CA, USA", sponsor = "ACM. Special Interest Group on Algorithms and Computation Theory ACM. Special Interest Group on Computer Architecture Theory ACM. Special Interest Group on Computer Architecture European Association for Theoretical Computer Science.", sponsororg = "ACM; EATCS", treatment = "P Practical", } @Proceedings{Agrawal:1995:PIW, editor = "D. P. Agrawal", booktitle = "{Proceedings of the 1995 ICPP Workshop on Challenges for Parallel Processing, August 14, 1995, Raleigh, NC, USA}", title = "{Proceedings of the 1995 ICPP Workshop on Challenges for Parallel Processing, August 14, 1995, Raleigh, NC, USA}", publisher = pub-CRC, address = pub-CRC:adr, pages = "vi + 162", year = "1995", ISBN = "0-8493-2618-4", ISBN-13 = "978-0-8493-2618-9", LCCN = "QA76.58.I34 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confsponsor = "Pennsylvania State Univ", } @Proceedings{Aityan:1995:PFI, editor = "S. K. Aityan and L. T. Grujic and R. J. Hathaway and G. S. Ladde and N. Medhin and M. Sambandham", booktitle = "{Proceedings of the First International Conference on Neural, Parallel and Scientific Computations held at Morehouse College, Atlanta, USA, May 28--31, 1995}", title = "{Proceedings of the First International Conference on Neural, Parallel and Scientific Computations held at Morehouse College, Atlanta, USA, May 28--31, 1995}", publisher = "Dynamic Publishers", address = "Atlanta, GA, USA", pages = "xi + 506", year = "1995", ISBN = "0-9640398-9-3 (hardback) 0-9640398-8-5 (paperback)", ISBN-13 = "978-0-9640398-9-6 (hardback) 978-0-9640398-8-9 (paperback)", LCCN = "QA76.87 .I58 1995", bibdate = "Wed Apr 16 13:17:34 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Proceedings of Neural Parallel and Scientific Computations 1995", acknowledgement = ack-nhfb, } @Proceedings{Alnuweiri:1995:PHF, editor = "Hussein M. Alnuweiri and Mounir Hamdi", booktitle = "{Proceedings of HiNet '95: first international workshop on high-speed network computing, April 25, 1995, Santa Barbara, California}", title = "{Proceedings of HiNet '95: first international workshop on high-speed network computing, April 25, 1995, Santa Barbara, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "vii + 119", year = "1995", ISBN = "0-8186-7124-6", ISBN-13 = "978-0-8186-7124-1", LCCN = "TK5105.5 .H56 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "IEEE.", } @Proceedings{Anonymous:1995:CCS, editor = "Anonymous", booktitle = "{3rd CLIPS conference --- September 1994, Houston, TX}", title = "{3rd CLIPS conference --- September 1994, Houston, TX}", publisher = pub-NASA, address = pub-NASA:adr, pages = "????", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "NASA Publications N N95-19625-647, N95-19747-768", acknowledgement = ack-nhfb, sponsor = "United States. National Aeronautics and Space Administration.", } @Proceedings{Anonymous:1995:RSS, editor = "Anonymous", booktitle = "{Reservoir simulation: 13th Symposium --- February 1995, San Antonio, TX}", title = "{Reservoir simulation: 13th Symposium --- February 1995, San Antonio, TX}", publisher = pub-SPE, address = pub-SPE:adr, pages = "????", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Papers --- Society of Petroleum Engineers of AIME", acknowledgement = ack-nhfb, sponsor = "American Institute of Mechanical Engineers; Society of Petroleum Engineers.", } @Proceedings{ANS:1995:MCR, editor = "{ANS}", booktitle = "{Mathematics and computations, reactor physics, and environmental analyses: International conference --- April 1995, Portland, OR}", title = "{Mathematics and computations, reactor physics, and environmental analyses: International conference --- April 1995, Portland, OR}", publisher = "American Nuclear Society", address = "La Grange Park, IL, USA", pages = "xvi + 1597", year = "1995", ISBN = "0-89448-198-3", ISBN-13 = "978-0-89448-198-7", LCCN = "TK9006.M37 1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", acknowledgement = ack-nhfb, sponsor = "American Nuclear Society; Mathematics and Computation Division.", xxeditor-1 = "A. Qaddouri and R. Roy and B. Goulard", xxeditor-2 = "Z. Stankovski", } @Proceedings{Arabnia:1995:TRA, editor = "Hamid Arabnia", booktitle = "{Transputer research and applications 7: American Transputer Users Group, October 23--25, 1994, Atlanta, GA (NATUG-7)}", title = "{Transputer research and applications 7: American Transputer Users Group, October 23--25, 1994, Atlanta, GA (NATUG-7)}", volume = "42", publisher = pub-IOS, address = pub-IOS:adr, pages = "ix + 349", year = "1995", ISBN = "90-5199-187-8 (IOS Press), 4-274-90017-7 (Ohmsha)", ISBN-13 = "978-90-5199-187-1 (IOS Press), 978-4-274-90017-4 (Ohmsha)", ISSN = "0925-4986", LCCN = "????", bibdate = "Mon Jan 15 18:41:48 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Transputer and occam engineering series", acknowledgement = ack-nhfb, } @Proceedings{Bailey:1995:PSS, editor = "D. H. Bailey and P. E. Bjorstad and J. R. Gilbert and M. V. Mascagni and R. S. Schreiber and H. D. Simon and V. J. Torczon and L. T. Watson", booktitle = "{Proceedings of the Seventh SIAM Conference on Parallel Processing for Scientific Computing (San Francisco, CA, USA)}", title = "{Proceedings of the Seventh SIAM Conference on Parallel Processing for Scientific Computing (San Francisco, CA, USA)}", publisher = pub-SIAM, address = pub-SIAM:adr, pages = "xviii + 875", year = "1995", ISBN = "0-89871-344-7", ISBN-13 = "978-0-89871-344-2", LCCN = "QA76.58.S55 1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "15--17 Feb. 1995", conflocation = "San Francisco, CA, USA; 15-17 Feb. 1995", conftitle = "Proceedings of the Seventh SIAM Conference on Parallel Processing for Scientific Computing", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", sponsor = "Society for Industrial and Applied Mathematics.", treatment = "P Practical", } @Proceedings{Bernardi:1995:CCE, editor = "Francesco Bernardi and Jean-Louis Rivail", booktitle = "{Computational chemistry: 1st European conference on computational chemistry (May 1994, Nancy, France)}", title = "{Computational chemistry: 1st European conference on computational chemistry (May 1994, Nancy, France)}", number = "330", publisher = pub-AIP, address = pub-AIP:adr, pages = "various", year = "1995", ISBN = "1-56396-457-0", ISBN-13 = "978-1-56396-457-2", ISSN = "0094-243X (print), 1551-7616 (electronic), 1935-0465", LCCN = "QD39.3.E46 E15 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "AIP Conference Proceedings", acknowledgement = ack-nhfb, sponsor = "Federation of European Chemical Societies.", } @Proceedings{Bilger:1995:AFM, editor = "R. W. Bilger", booktitle = "{12th Australasian fluid mechanics conference: --- December 1995, Sydney, Australia}", title = "{12th Australasian fluid mechanics conference: -- December 1995, Sydney, Australia}", publisher = "University of Sydney", address = "????", pages = "????", year = "1995", ISBN = "0-86934-034-4", ISBN-13 = "978-0-86934-034-9", LCCN = "????", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Australasian Fluid Mechanics Conference 1995; EDIT 12//V2", acknowledgement = ack-nhfb, sponsor = "University of Sydney.", } @Proceedings{Breitenecker:1995:ESC, editor = "Felix Breitenecker and Irmgard Husinsky", booktitle = "{EUROSIM '95: simulation congress: proceedings of the EUROSIM Conference, EUROSIM '95, Vienna, Austria, 11--15 September 1995}", title = "{EUROSIM '95: simulation congress: proceedings of the EUROSIM Conference, EUROSIM '95, Vienna, Austria, 11--15 September 1995}", publisher = pub-ELS, address = pub-ELS:adr, pages = "xxii + 1356", year = "1995", ISBN = "0-444-82241-0", ISBN-13 = "978-0-444-82241-3", LCCN = "A76.9.C65E966 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, pubcountry = "Netherlands", sponsor = "Federation of the European Simulation Societies.", } @Proceedings{Cantoni:1995:CCA, editor = "Virginio Cantoni and L. Lombardi and M. Mosconi and M. Savini and A. Setti", booktitle = "{CAMP '95, computer architectures for machine perception: proceedings, September 18--20, 1995, Como, Italy}", title = "{CAMP '95, computer architectures for machine perception: proceedings, September 18--20, 1995, Como, Italy}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "x + 461", year = "1995", ISBN = "0-8186-7134-3", ISBN-13 = "978-0-8186-7134-0", LCCN = "QA76.9.A73W675 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95TB8093.", acknowledgement = ack-nhfb, confsponsor = "Pavia Univ. --- Dipt. Inf. Sistemistica Centro di Cultura Sci. `A. Volta'; IEEE Comput. Soc. Tech. Committee on Comput. Archit.; IEEE Comput. Soc. Tech. Committee on PAMI; ACM SIGART/SIGARCH; Int. Assoc. Pattern Recognition", } @Proceedings{Cook:1995:TAS, editor = "B. M. Cook and M. R. Jane and P. Nixon and P. M. Welch", booktitle = "{Transputer Applications and Systems '95. Proceedings of the 1995 World Transputer Congress, 4--6 September 1995, Harrogate, North Yorkshire, UK}", title = "{Transputer Applications and Systems '95. Proceedings of the 1995 World Transputer Congress, 4--6 September 1995, Harrogate, North Yorkshire, UK}", publisher = pub-IOS, address = pub-IOS:adr, pages = "614", year = "1995", ISBN = "90-5199-235-1 (IOS Press), 4-274-90062-2 (Ohmsha)", ISBN-13 = "978-90-5199-235-9 (IOS Press), 978-4-274-90062-4 (Ohmsha)", LCCN = "????", bibdate = "Wed Apr 16 12:07:36 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:1995:HPC, editor = "J. J. Dongarra and others", booktitle = "{High performance computing: technology, methods, and applications (Advanced workshop, June 1994, Cetraro, Italy)}", title = "{High performance computing: technology, methods, and applications (Advanced workshop, June 1994, Cetraro, Italy)}", volume = "10", publisher = pub-ELS, address = pub-ELS:adr, pages = "viii + 427", year = "1995", ISBN = "0-444-82163-5", ISBN-13 = "978-0-444-82163-8", ISSN = "0927-5452", LCCN = "QA76.88.H55 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Advances in Parallel Computing", acknowledgement = ack-nhfb, } @Proceedings{El-Rewini:1995:PTE, editor = "H. El-Rewini and B. D. Shriver", booktitle = "{Proceedings of the Twenty-Eighth Hawaii International Conference on System Sciences}", title = "{Proceedings of the Twenty-Eighth Hawaii International Conference on System Sciences}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "x + 361 (vol. 1), xv + 762 (vol. 2), xv + 600 (vol. 3), xx + 1042 (vol. 4), x + 362 (vol. 5)", year = "1995", ISBN = "0-8186-6935-7", ISBN-13 = "978-0-8186-6935-4", LCCN = "????", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confdate = "3--6 Jan. 1995", conflocation = "Wailea, HI, USA", confsponsor = "Univ. Hawaii; Univ. Hawaii Coll. Bus. Admin.; IEEE Comput. Soc.; ACM; PRISM", } @Proceedings{Ferenczi:1995:PAH, editor = "Szabolcs Ferenczi and Peter Kacsuk", booktitle = "{Proceedings of the 2nd Austrian-Hungarian Workshop on Transputer Applications: September 29--October 1, 1994, Budapest, Hungary}", title = "{Proceedings of the 2nd Austrian-Hungarian Workshop on Transputer Applications: September 29--October 1, 1994, Budapest, Hungary}", publisher = "Hungarian Academy of Sciences, Central Research Intitute for Physics", address = "Budapest, Hungary", pages = "vii + 282", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Apr 16 13:32:12 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Technical report KFKI-1995-2/M,N.", acknowledgement = ack-nhfb, } @Proceedings{Ferreira:1995:PAI, editor = "Afonso Ferreira and Jose Rolim", booktitle = "{Parallel algorithms for irregularly structured problems: second international workshop, IRREGULAR 95, Lyon, France, September, 4--6, 1995: proceedings}", title = "{Parallel algorithms for irregularly structured problems: second international workshop, IRREGULAR 95, Lyon, France, September, 4--6, 1995: proceedings}", publisher = pub-SV, address = pub-SV:adr, pages = "x + 409", year = "1995", ISBN = "3-540-60321-2", ISBN-13 = "978-3-540-60321-4", LCCN = "QA76.642.I59 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, confsponsor = "IFIP", pubcountry = "Germany", } @Proceedings{Fritzson:1995:PPA, editor = "Peter Fritzson and Leif Finmo", booktitle = "{Parallel programming and applications: proceedings of the Workshop on Parallel Programming and Computation (ZEUS '95) and the 4th Nordic Transputer Conference (NTUG '95): Link{\"o}ping, Sweden}", title = "{Parallel programming and applications: proceedings of the Workshop on Parallel Programming and Computation (ZEUS '95) and the 4th Nordic Transputer Conference (NTUG '95): Link{\"o}ping, Sweden}", publisher = pub-IOS, address = pub-IOS:adr, pages = "ix + 435", year = "1995", ISBN = "90-5199-229-7 (IOS Press), 4-274-90056-8 (Ohmsha)", ISBN-13 = "978-90-5199-229-8 (IOS Press), 978-4-274-90056-3 (Ohmsha)", LCCN = "????", bibdate = "Wed Apr 16 13:23:58 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Gates:1995:PFI, editor = "W. Lawrence (William Lawrence) Gates", booktitle = "{Proceedings of the First International AMIP Scientific Conference: Monterey, California, USA, 15--19 May 1995}", title = "{Proceedings of the First International AMIP Scientific Conference: Monterey, California, USA, 15--19 May 1995}", number = "732", publisher = "World Meteorological Organization", address = "Geneva, Switzerland", pages = "viii + 532", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "SIO 1 WO326 v.92", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "World Meteorological Organization --- Publications --- WMO TD 1995", acknowledgement = ack-nhfb, sponsor = "Atmospheric Model Intercomparison Project.", } @Proceedings{Gray:1995:PCT, editor = "J. P. Gray and F. Naghdy", booktitle = "{Parallel Computing: Technology and Practice. PCAT-94. Proceedings of the 7th Australian Transputer and Occam User Group Conference: Woollongong, NSW, Australia, 8--9 November 1994}", title = "{Parallel Computing: Technology and Practice. PCAT-94. Proceedings of the 7th Australian Transputer and Occam User Group Conference: Woollongong, NSW, Australia, 8--9 November 1994}", publisher = pub-IOS, address = pub-IOS:adr, pages = "vii + 300", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Apr 16 12:10:49 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Grinstein:1995:VDE, editor = "Georges G. Grinstein and Robert F. Erbacher", booktitle = "{Visual data exploration and analysis II: 8--10 February 1995, San Jose, California}", title = "{Visual data exploration and analysis II: 8--10 February 1995, San Jose, California}", volume = "2410", publisher = pub-SPIE, address = pub-SPIE:adr, pages = "viii + 482", year = "1995", CODEN = "PSISDG", ISBN = "0-8194-1757-2", ISBN-13 = "978-0-8194-1757-2", ISSN = "0277-786X (print), 1996-756X (electronic)", LCCN = "TS510.S63 v.2410", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = j-PROC-SPIE, acknowledgement = ack-nhfb, confsponsor = "SPIE", } @Proceedings{Hamza:1995:PII, editor = "M. H. Hamza", booktitle = "{Proceedings of the IASTED International Conference. Modelling and Simulation: Pittsburgh, PA, USA, 27--29 April 1995}", title = "{Proceedings of the IASTED International Conference. Modelling and Simulation: Pittsburgh, PA, USA, 27--29 April 1995}", publisher = "IASTEC-Acta Press", address = "Anaheim, CA, USA", pages = "598", year = "1995", ISBN = "0-88986-218-4", ISBN-13 = "978-0-88986-218-0", LCCN = "QA76.9.C65 I295 1995", bibdate = "Fri Feb 01 06:58:29 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Haridi:1995:EPP, editor = "Seif Haridi and Khayri Ali and Peter Magnusson", booktitle = "{EURO-PAR '95 parallel processing: First International EURO PAR Conference, Stockholm, Sweden, August 29--31, 1995: proceedings}", title = "{EURO-PAR '95 parallel processing: First International EURO PAR Conference, Stockholm, Sweden, August 29--31, 1995: proceedings}", number = "966", publisher = pub-SV, address = pub-SV:adr, pages = "xv + 730", year = "1995", ISBN = "3-540-60247-X", ISBN-13 = "978-3-540-60247-7", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.I553 1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, corpsource = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech. Hochschule, Manno, Switzerland", pubcountry = "Germany", sponsor = "Swedish Institute of Computer Science.", treatment = "P Practical", } @Proceedings{Hassanzadeh:1995:MMG, editor = "Siamak Hassanzadeh", booktitle = "{Mathematical methods in geophysical imaging III: 12--13 July 1995, San Diego, California}", title = "{Mathematical methods in geophysical imaging III: 12--13 July 1995, San Diego, California}", volume = "2571", publisher = pub-SPIE, address = pub-SPIE:adr, pages = "vii + 240", year = "1995", CODEN = "PSISDG", ISBN = "0-8194-1930-3", ISBN-13 = "978-0-8194-1930-9", ISSN = "0277-786X (print), 1996-756X (electronic)", LCCN = "TS510.S63 v.2571", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = j-PROC-SPIE, acknowledgement = ack-nhfb, confsponsor = "SPIE", } @Proceedings{Hertzberger:1995:HPM, editor = "Bob Hertzberger and Giuseppe Serazzi", booktitle = "{High-Per\-for\-mance computing and networking: International Conference and Exhibition, Milan, Italy, May 3--5, 1995: proceedings}", title = "{High-Per\-for\-mance computing and networking: International Conference and Exhibition, Milan, Italy, May 3--5, 1995: proceedings}", number = "919", publisher = pub-SV, address = pub-SV:adr, pages = "xxiv + 957", year = "1995", ISBN = "3-540-59393-4", ISBN-13 = "978-3-540-59393-5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.88 .I57 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, sponsor = "High Performance Computing and Networking Foundation.", } @Proceedings{Hoffmann:1995:CAP, editor = "Geerd-R. Hoffmann and Norbert Kreitz", booktitle = "{Coming of age: proceedings of the Sixth ECMWF Workshop on the Use of Parallel Processors in Meteorology, Reading, UK, November 21--25, 1994}", title = "{Coming of age: proceedings of the Sixth ECMWF Workshop on the Use of Parallel Processors in Meteorology, Reading, UK, November 21--25, 1994}", publisher = pub-WORLD-SCI, address = pub-WORLD-SCI:adr, pages = "x + 568", year = "1995", ISBN = "981-02-2211-4", ISBN-13 = "978-981-02-2211-6", LCCN = "QC866.E26 1994", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, pubcountry = "Singapore", } @Proceedings{IEEE:1995:CPI, editor = "{IEEE}", booktitle = "{Conference proceedings of the 1995 IEEE Fourteenth Annual International Phoenix Conference on Computers and Communications: Scottsdale, Arizona, USA, March 28--31, 1995}", title = "{Conference proceedings of the 1995 IEEE Fourteenth Annual International Phoenix Conference on Computers and Communications: Scottsdale, Arizona, USA, March 28--31, 1995}", volume = "14", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvii + 742", year = "1995", ISBN = "0-7803-2493-5, 0-7803-2492-7, 0-7803-2494-3", ISBN-13 = "978-0-7803-2493-0, 978-0-7803-2492-3, 978-0-7803-2494-7", LCCN = "TK7885.A1 I567 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95CH35751.", acknowledgement = ack-nhfb, sponsor = "IEEE.", } @Proceedings{IEEE:1995:DPT, editor = "{IEEE}", booktitle = "{Digest of papers / the Twenty-fifth International Symposium on Fault-Tolerant Computing, June 27--30, 1995, Pasadena, California}", title = "{Digest of papers / the Twenty-fifth International Symposium on Fault-Tolerant Computing, June 27--30, 1995, Pasadena, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxiii + 547", year = "1995", ISBN = "0-8186-7079-7", ISBN-13 = "978-0-8186-7079-4", LCCN = "QA 76.9 F38 I57 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95CB35823.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Tech. Committee on Fault-Tolerant Comput.; LAAS-CNRS, France; Univ. Illinois at Urbana-Champaign; Univ. California at Los Angeles; Jep Propulsion Lab.; IFIP WG 10.4", } @Proceedings{IEEE:1995:IIC, editor = "{IEEE}", booktitle = "{1995 IEEE International Conference on Systems, Man, and Cybernetics: intelligent systems for the 21st century: Vancouver, British Columbia, Canada, October 22--25, 1995}", title = "{1995 IEEE International Conference on Systems, Man, and Cybernetics: intelligent systems for the 21st century: Vancouver, British Columbia, Canada, October 22--25, 1995}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "4711", year = "1995", ISBN = "0-7803-2559-1", ISBN-13 = "978-0-7803-2559-3", LCCN = "TA168.I19 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Five volumes. IEEE catalog no. 95CH3576-7.", acknowledgement = ack-nhfb, } @Proceedings{IEEE:1995:IPR, editor = "{IEEE}", booktitle = "{IEEE Pacific Rim Conference on Communications, Computers, and Signal Processing: proceedings / May 17--19, 1995, Victoria Conference Centre, Victoria, British Columbia, Canada}", title = "{IEEE Pacific Rim Conference on Communications, Computers, and Signal Processing: proceedings / May 17--19, 1995, Victoria Conference Centre, Victoria, British Columbia, Canada}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiv + 638", year = "1995", ISBN = "0-7803-2553-2", ISBN-13 = "978-0-7803-2553-1", LCCN = "TK 5101 A1 I34 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95CH35765.", acknowledgement = ack-nhfb, confsponsor = "IEEE Victoria Sect.; IEEE Canada; Dept. Comput. Sci. and the Fac. Eng., Univ. Victoria", } @Proceedings{IEEE:1995:ISE, editor = "{IEEE}", booktitle = "{Ideas in Science and Electronics Exposition and Symposium. Proceedings: Albuquerque, NM, USA, 9--11 May 1995}", title = "{Ideas in Science and Electronics Exposition and Symposium. Proceedings: Albuquerque, NM, USA, 9--11 May 1995}", volume = "17", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "64", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Annual Ideas in Science and Electronics Exposition and Symposium Conference", acknowledgement = ack-nhfb, sponsor = "IEEE.", } @Proceedings{IEEE:1995:PEW, editor = "{IEEE}", booktitle = "{Proceedings: Euromicro Workshop on Parallel and Distributed Processing, San Remo, Italy, January 25--27, 1995}", title = "{Proceedings: Euromicro Workshop on Parallel and Distributed Processing, San Remo, Italy, January 25--27, 1995}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiii + 582", year = "1995", ISBN = "0-8186-7031-2, 0-8186-7032-0", ISBN-13 = "978-0-8186-7031-2, 978-0-8186-7032-9", LCCN = "QA76.58 .E97 1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Euromicro Workshop on Parallel and Distributed Processing 1995; 3rd", acknowledgement = ack-nhfb, xxeditor1 = "I. Martin and J. C. Fabero and F. Tirado and A. Bautista", xxeditor2 = "V. Gianuzzi and F. Merani", } @Proceedings{IEEE:1995:PFI, editor = "{IEEE}", booktitle = "{Proceedings of the Fourth IEEE International Symposium on High Performance Distributed Computing, August 2--4, 1995, Washington, DC, USA}", title = "{Proceedings of the Fourth IEEE International Symposium on High Performance Distributed Computing, August 2--4, 1995, Washington, DC, USA}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiv + 246", year = "1995", ISBN = "0-8186-7088-6", ISBN-13 = "978-0-8186-7088-6", LCCN = "QA76.9.D5 I328 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95TB8075.", acknowledgement = ack-nhfb, confsponsor = "IEEE Tech. Committee on Distrib. Process.; Northeast Parallel Architectures Centre (NPAC) at Syracuse Univ.; ACM SIGCOMM; Rome Lab", sponsor = "IEEE. Computer Society. Technical Committee on Distributed Processing Northeast Parallel Architectures Center.", } @Proceedings{IEEE:1995:PIC, editor = "{IEEE}", booktitle = "{Proceedings of the 15th International Conference on Distributed Computing Systems: Vancouver, BC, Canada, 30 May--2 June 1995}", title = "{Proceedings of the 15th International Conference on Distributed Computing Systems: Vancouver, BC, Canada, 30 May--2 June 1995}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xx + 537", year = "1995", ISBN = "0-8186-7025-8", ISBN-13 = "978-0-8186-7025-1", LCCN = "????", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95CH35784.", acknowledgement = ack-nhfb, corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights, NY, USA", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", treatment = "A Application; P Practical", } @Proceedings{IEEE:1995:PIP, editor = "{IEEE}", booktitle = "{Proceedings / 9th International Parallel Processing Symposium, April 25--28, 1995, Santa Barbara, California}", title = "{Proceedings / 9th International Parallel Processing Symposium, April 25--28, 1995, Santa Barbara, California}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxiii + 851", year = "1995", ISBN = "0-8186-7074-6", ISBN-13 = "978-0-8186-7074-9", LCCN = "QA 76.58 I56 1995", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95TH8052.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Tech. Committee on Parallel Process", } @Proceedings{IEEE:1995:PNA, editor = "{IEEE}", booktitle = "{Proceedings: the nineteenth annual International Computer Software and Applications Conference (COMPSAC '95): August 9--11, 1995, Dallas, Texas}", title = "{Proceedings: the nineteenth annual International Computer Software and Applications Conference (COMPSAC '95): August 9--11, 1995, Dallas, Texas}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 431", year = "1995", ISBN = "0-8186-7119-X", ISBN-13 = "978-0-8186-7119-7", LCCN = "QA 76.6 C6295 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog no. 95CB35838.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc", } @Proceedings{IEEE:1995:PSI, editor = "{IEEE}", booktitle = "{Proceedings / Seventh IEEE Symposium on Parallel and Distributed Processing, October 25--28, 1995, San Antonio, Texas}", title = "{Proceedings / Seventh IEEE Symposium on Parallel and Distributed Processing, October 25--28, 1995, San Antonio, Texas}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvii + 724", year = "1995", ISBN = "0-8186-7195-5", ISBN-13 = "978-0-8186-7195-1", LCCN = "QA 76.58 I42 1995", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 95TB8131.", acknowledgement = ack-nhfb, conflocation = "San Antonio, TX, USA; 25-28 Oct. 1995", confsponsor = "IEEE Comput Soc. Tech. Committee on Comput. Architecture; IEEE Comput. Soc. Tech. Committee on Distributed Process.; IEEE Comput. Soc. Dallas Chapter", conftitle = "Proceedings of Seventh IEEE Symposium on Parallel and Distributed Processing", corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", sponsororg = "IEEE Comput Soc. Tech. Committee on Comput. Architecture; IEEE Comput. Soc. Tech. Committee on Distributed Process.; IEEE Comput. Soc. Dallas Chapter", treatment = "P Practical", } @Proceedings{IEEE:1995:PSP, editor = "{IEEE}", booktitle = "{Proceedings of the 1994 Scalable Parallel Libraries Conference: October 12--14, 1994, Mississippi State University, Mississippi}", title = "{Proceedings of the 1994 Scalable Parallel Libraries Conference: October 12--14, 1994, Mississippi State University, Mississippi}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "vii + 191", year = "1995", ISBN = "0-8186-6895-4", ISBN-13 = "978-0-8186-6895-1", LCCN = "QA76.58 .S34 1994", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, conflocation = "Mississippi State, MS, USA; 12-14 Oct. 1994", confsponsor = "Mississippi State Univ.; NSF", conftitle = "Proceedings Scalable Parallel Libraries Conference", corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ., Pittsburgh, PA, USA", sponsororg = "Mississippi State Univ.; NSF", treatment = "P Practical", } @Proceedings{IFIP:1995:KWC, editor = "{IFIP Working Group 2.5}", booktitle = "{Kyoto Workshop 1995: Current Directions in Numerical Software and High Performance Computing, 19--20 October 1995, Kyoto, Japan}", title = "{Kyoto Workshop 1995: Current Directions in Numerical Software and High Performance Computing, 19--20 October 1995, Kyoto, Japan}", publisher = "????", address = "????", pages = "????", year = "1995", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Jan 24 06:55:27 2001", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.nsc.liu.se/~boein/ifip/kyoto/kyoto.html#reid; http://www.nsc.liu.se/~boein/ifip/kyoto/workshop-info/proceedings/", acknowledgement = ack-nhfb, } @Proceedings{Levelt:1995:IIS, editor = "A. H. M. Levelt", booktitle = "{ISSAC '95: International symposium on symbolic and algebraic computation --- July 10--12, 1995, Montr{\'e}al, Canada}", title = "{ISSAC '95: International symposium on symbolic and algebraic computation --- July 10--12, 1995, Montr{\'e}al, Canada}", publisher = pub-ACM, address = pub-ACM:adr, pages = "xviii + 314", year = "1995", ISBN = "0-89791-699-9", ISBN-13 = "978-0-89791-699-8", LCCN = "QA 76.95 I59 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "ISSAC --- Proceedings", acknowledgement = ack-nhfb, } @Proceedings{Malyshkin:1995:PCT, editor = "Victor Malyshkin", booktitle = "{Parallel computing technologies: third international conference, PaCT-95, St. Petersburg, Russia, September 12--25, 1995: proceedings}", title = "{Parallel computing technologies: third international conference, PaCT-95, St. Petersburg, Russia, September 12--25, 1995: proceedings}", number = "964", publisher = pub-SV, address = pub-SV:adr, pages = "xii + 495", year = "1995", ISBN = "3-540-60222-4", ISBN-13 = "978-3-540-60222-4", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.I547 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, sponsor = "Russian Academy of Sciences. Computing Center Electrotechnical University of St. Petersburg.", } @Proceedings{Nadeau:1995:SVR, editor = "David R. Nadeau and John L. Moreland", booktitle = "{1995 Symposium on the Virtual Reality Modeling Language, VRML '95, San Diego, California, December 14--15, 1995}", title = "{1995 Symposium on the Virtual Reality Modeling Language, VRML '95, San Diego, California, December 14--15, 1995}", publisher = pub-ACM, address = pub-ACM:adr, pages = "139", year = "1995", ISBN = "0-89791-818-5", ISBN-13 = "978-0-89791-818-3", LCCN = "QA76.76.H94 S95 1995", bibdate = "Fri Sep 11 08:29:11 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order number 434953.", acknowledgement = ack-nhfb, confsponsor = "San Diego Supercomput. Center; ACM", keywords = "SGML; Virtual reality --- Congresses; VRML (Computer program language) --- Congresses", } @Proceedings{Narashimhan:1995:IIF, editor = "V. L. Narashimhan", booktitle = "{ICAPP 95. IEEE First International Conference on Algorithms and Architectures for Parallel Processing, Brisbane, Australia, 19--21 April, 1995}", title = "{ICAPP 95. IEEE First International Conference on Algorithms and Architectures for Parallel Processing, Brisbane, Australia, 19--21 April, 1995}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvii + 961", year = "1995", ISBN = "0-7803-2018-2 (paperback), 0-7803-2019-0 (microfiche)", ISBN-13 = "978-0-7803-2018-5 (paperback), 978-0-7803-2019-2 (microfiche)", LCCN = "QA76.6.I15 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes. IEEE catalog no. 95TH0682-5.", acknowledgement = ack-nhfb, confsponsor = "Parallel Algorithms, Archit. and Software Eng. Res. Lab.; IEEE; IEEE Comput. Soc.; ACM; Euromicro; IBM; Instn. Eng. Australia; Inst. Radio and Electron. Eng. Soc.; Australian Comput. Soc", } @Proceedings{Pahl:1995:CCB, editor = "Peter Jan Pahl and Heinrich Werner", booktitle = "{Computing in civil and building engineering: 6th International conference --- July 1995, Berlin}", title = "{Computing in civil and building engineering: 6th International conference --- July 1995, Berlin}", publisher = "A. A. Balkema", address = "Brookfield, VT, USA", pages = "xxiv + 1641", year = "1995", ISBN = "90-5410-556-9, 90-5410-557-7", ISBN-13 = "978-90-5410-556-5, 978-90-5410-557-2", LCCN = "TA345 .I565 1995 v.1-2", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", series = "Computing in Civil and Building Engineering 6th", acknowledgement = ack-nhfb, sponsor = "Arbeitskreis Bauinformatik Technologie-Vermittlungs-Agentur Berlin e.V..", } @Proceedings{Pingali:1995:LCP, editor = "K. Pingali and U. Banerjee and D. Gelernter and A. Nicolau and D. Padua", booktitle = "{Languages and compilers for parallel computing: 7th International Workshop, Ithaca, NY, USA, August 8--10, 1994: proceedings}", title = "{Languages and compilers for parallel computing: 7th International Workshop, Ithaca, NY, USA, August 8--10, 1994: proceedings}", volume = "892", publisher = pub-SV, address = pub-SV:adr, pages = "xl + 496", year = "1995", ISBN = "3-540-58868-X", ISBN-13 = "978-3-540-58868-9", LCCN = "QA76.58 .W656 1994", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, pubcountry = "Germany", } @Proceedings{Prasanna:1995:FIP, editor = "Viktor K. Prasanna and V. P. Bhatkar and L. M. Patnaik and S. K. Tripathi", booktitle = "{First IWPP parallel processing: proceedings of the First International Workshop on Parallel Processing (IWPP-94): December 26--31, 1994, Bangalore, India}", title = "{First IWPP parallel processing: proceedings of the First International Workshop on Parallel Processing (IWPP-94): December 26--31, 1994, Bangalore, India}", publisher = "Taka McGraw-Hill Pub. Co", address = "New Delhi; New York", pages = "xxiii + 736", year = "1995", ISBN = "0-07-462332-X", ISBN-13 = "978-0-07-462332-9", LCCN = "QA 76.58 I587 1994", bibdate = "Wed Apr 16 14:07:03 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Satofuka:1995:PCF, editor = "N. Satofuka and Jacques Periaux and Akin Ecer", booktitle = "{Parallel computational fluid dynamics: new algorithms and applications: proceedings of the Parallel CFD '94 Conference, Kyoto, Japan, 16--19 May 1994}", title = "{Parallel computational fluid dynamics: new algorithms and applications: proceedings of the Parallel CFD '94 Conference, Kyoto, Japan, 16--19 May 1994}", publisher = pub-ELS, address = pub-ELS:adr, pages = "xi + 457", year = "1995", ISBN = "0-444-82317-4", ISBN-13 = "978-0-444-82317-5", LCCN = "QA911 .P35 1994", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "fluid dynamics -- data processing -- congresses; parallel processing (electronic computers) -- congresses; supercomputers -- congresses", } @Proceedings{Shaw:1995:ADA, editor = "R. A. (Richard A.) Shaw and H. E. (Harry E.) Payne and J. J. E. (Jeffrey J. E.) Hayes", booktitle = "{Astronomical data analysis software and systems IV: meeting held at Baltimore, Maryland, 25--28 September 1994}", title = "{Astronomical data analysis software and systems IV: meeting held at Baltimore, Maryland, 25--28 September 1994}", volume = "77", publisher = "Astronomical Society of the Pacific", address = "San Francisco, CA, USA", pages = "xxxvi + 533", year = "1995", ISBN = "0-937707-96-1", ISBN-13 = "978-0-937707-96-8", ISSN = "1080-7926", LCCN = "QB51.3.E43 A87 1994", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Astronomical Society of the Pacific Conference Series", acknowledgement = ack-nhfb, sponsor = "Astronomical Society of the Pacific.", } @Proceedings{Tentner:1995:HPC, editor = "A. Tentner", booktitle = "{High Performance Computing Symposium 1995 `Grand Challenges in Computer Simulation'. Proceedings of the 1995 Simulation Multiconference: Phoenix, AZ, USA, 9--13 April 1995}", title = "{High Performance Computing Symposium 1995 `Grand Challenges in Computer Simulation'. Proceedings of the 1995 Simulation Multiconference: Phoenix, AZ, USA, 9--13 April 1995}", publisher = "Society for Computer Simulation", address = "San Diego, CA, USA", pages = "xxiii + 566", year = "1995", ISBN = "1-56555-078-1", ISBN-13 = "978-1-56555-078-0", LCCN = "????", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, corpsource = "Oak Ridge Nat. Lab., TN, USA", sponsororg = "SCS", treatment = "P Practical", } @Proceedings{Uselton:1995:PRS, editor = "Samuel P. Uselton and Michael Brian Cox and Craig M. Wittenbrink", booktitle = "{1995 Parallel Rendering Symposium (PRS 95): Atlanta, Georgia, October 30--31, 1995}", title = "{1995 Parallel Rendering Symposium (PRS 95): Atlanta, Georgia, October 30--31, 1995}", publisher = pub-ACM, address = pub-ACM:adr, pages = "107", year = "1995", ISBN = "0-89791-774-1 (softbound) [invalid checksum], 0-7803-3120-6 (microfiche)", ISBN-13 = "978-0-89791-774-2 (softbound), 978-0-7803-3120-4 (microfiche)", LCCN = "QA76.58.P3778 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order number 428957. IEEE Computer Society Press order number 95TB8134.", acknowledgement = ack-nhfb, confsponsor = "IEEE Comput. Soc. Techn. Committee on Comput. Graphics; ACM SIGGRAPH", } @Proceedings{USENIX:1995:PUT, editor = "{USENIX}", booktitle = "{Proceedings of the 1995 USENIX Technical Conference, January 16--20, 1995, New Orleans, Louisiana, USA}", title = "{Proceedings of the 1995 USENIX Technical Conference, January 16--20, 1995, New Orleans, Louisiana, USA}", publisher = pub-USENIX, address = pub-USENIX:adr, pages = "325", year = "1995", ISBN = "1-880446-67-7", ISBN-13 = "978-1-880446-67-6", LCCN = "QA 76.76 O63 U88 1995", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Vandoni:1995:CSC, editor = "C. E. Vandoni and C. Verkerk", booktitle = "{1994 CERN School of Computing: Sopron, Hungary, 28 August--10 September 1994: proceedings}", title = "{1994 CERN School of Computing: Sopron, Hungary, 28 August--10 September 1994: proceedings}", publisher = "CERN", address = "Geneva, Switzerland", pages = "ix + 336", year = "1995", ISBN = "92-9083-069-7", ISBN-13 = "978-92-9083-069-6", bibdate = "Sun Dec 22 10:20:45 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "CERN report 95-01.", acknowledgement = ack-nhfb, pubcountry = "Switzerland", } @Proceedings{VanKatwijk:1995:AAC, editor = "Jan {Van Katwijk}", booktitle = "{ACSCI '95: 1st Annual conference --- May 1995, Heijen, The Netherlands}", title = "{ACSCI '95: 1st Annual conference --- May 1995, Heijen, The Netherlands}", publisher = "ASCI", address = "Delft, The Netherlands", pages = "xi + 450", year = "1995", ISBN = "90-90-08344-8", ISBN-13 = "978-90-90-08344-5", LCCN = "QA75.5 .A38x 1995", bibdate = "Thu Feb 29 17:59:11 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Proceedings of the Annual Conference --- Advanced School for Computing and Imaging, 1st", acknowledgement = ack-nhfb, sponsor = "Advanced School for Computing and Imaging", } @Proceedings{Abrahart:1996:GIC, editor = "R. J. Abrahart", booktitle = "{GeoComputation 96. 1st International Conference on GeoComputation: Leeds, UK, 17--19 September 1996}", title = "{GeoComputation 96. 1st International Conference on GeoComputation: Leeds, UK, 17--19 September 1996}", publisher = "????", address = "????", pages = "????", year = "1996", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Apr 16 14:19:17 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{ACM:1996:FCP, editor = "{ACM}", booktitle = "{FCRC '96: Conference proceedings of the 1996 International Conference on Supercomputing: Philadelphia, Pennsylvania, USA, May 25--28, 1996}", title = "{FCRC '96: Conference proceedings of the 1996 International Conference on Supercomputing: Philadelphia, Pennsylvania, USA, May 25--28, 1996}", publisher = pub-ACM, address = pub-ACM:adr, pages = "xii + 406", year = "1996", ISBN = "0-89791-803-7", ISBN-13 = "978-0-89791-803-9", LCCN = "QA76.5 I61 1996", bibdate = "Wed Mar 18 12:33:29 MST 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order number 415961.", acknowledgement = ack-nhfb, keywords = "Supercomputers --- Congresses.", } @Proceedings{ACM:1996:SCP, editor = "{ACM}", booktitle = "{Supercomputing '96 Conference Proceedings: November 17--22, Pittsburgh, PA}", title = "{Supercomputing '96 Conference Proceedings: November 17--22, Pittsburgh, PA}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "1996", ISBN = "0-89791-854-1", ISBN-13 = "978-0-89791-854-1", LCCN = "QA 76.88 S8573 1996", bibdate = "Tue May 12 08:55:21 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM Order Number: 415962, IEEE Computer Society Press Order Number: RS00126.", URL = "http://www.supercomp.org/sc96/proceedings/", acknowledgement = ack-nhfb, } @Proceedings{ACM:1996:SVR, editor = "{ACM}", booktitle = "{1995 Symposium on the Virtual Reality Modeling Language (VRML `95)}", title = "{1995 Symposium on the Virtual Reality Modeling Language (VRML `95)}", publisher = pub-ACM, address = pub-ACM:adr, pages = "139", year = "1996", ISBN = "0-89791-818-5", ISBN-13 = "978-0-89791-818-3", LCCN = "????", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.acm.org/pubs/contents/proceedings/graph/217306/", acknowledgement = ack-nhfb, conflocation = "San Diego, CA, USA; 14-15 Dec. 1995", conftitle = "Proceedings of 1995 VMRL Workshop", corpsource = "Visual Comput. Lab., California Univ., San Diego, La Jolla, CA, USA", sponsororg = "San Diego Supercomput. Center; ACM", treatment = "P Practical", } @Proceedings{Bode:1996:PVM, editor = "Arndt Bode and Jack Dongarra and T. Ludwig and V. Sunderam", booktitle = "{Parallel virtual machine, EuroPVM '96: third European PVM conference, Munich, Germany, October 7--9, 1996: proceedings}", title = "{Parallel virtual machine, EuroPVM '96: third European PVM conference, Munich, Germany, October 7--9, 1996: proceedings}", volume = "1156", publisher = pub-SV, address = pub-SV:adr, pages = "xiv + 362", year = "1996", ISBN = "3-540-61779-5", ISBN-13 = "978-3-540-61779-2", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.E975 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, conflocation = "Munich, Germany; 7-9 Oct. 1996", conftitle = "Parallel Virtual Machine - EuroPVM '96. Third European PVM Conference. Proceedings", corpsource = "Computations and Commun. Res. Labs., NEC Europe Ltd., Sankt Augustin, Germany", keywords = "Parallel computers -- Congresses; Virtual computer systems -- Congresses.", pubcountry = "Germany", treatment = "P Practical", } @Proceedings{Boszormenyi:1996:PCT, editor = "Laszlo Boszormenyi", booktitle = "{Parallel computation: Third International ACPC Conference with special emphasis on parallel databases and parallel I/O, Klagenfurt, Austria, September 23--25, 1996: proceedings}", title = "{Parallel computation: Third International ACPC Conference with special emphasis on parallel databases and parallel I/O, Klagenfurt, Austria, September 23--25, 1996: proceedings}", volume = "1127", publisher = pub-SV, address = pub-SV:adr, pages = "xi + 234", year = "1996", ISBN = "3-540-61695-0", ISBN-13 = "978-3-540-61695-5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA267.A1 L43 no.1127", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, keywords = "parallel processing (electronic computers) -- congresses", } @Proceedings{Bouge:1996:EPP, editor = "Luc Bouge and P. Fraigniaud and A. Mignotte and Y. Robert", booktitle = "{Euro-Par '96 parallel processing: second International Euro-Par Conference, Lyon, France, August 26--29, 1996: proceedings}", title = "{Euro-Par '96 parallel processing: second International Euro-Par Conference, Lyon, France, August 26--29, 1996: proceedings}", volume = "1123--1124", publisher = pub-SV, address = pub-SV:adr, pages = "xxxiii + 842 (vol. 1), 926 (vol. 2)", year = "1996", ISBN = "3-540-61626-8 (vol. 1), 3-540-61627-6 (vol. 2)", ISBN-13 = "978-3-540-61626-9 (vol. 1), 978-3-540-61627-6 (vol. 2)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.I554 1996, QA267.A1 L43 no.1123-1124", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Two volumes.", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, conflocation = "Lyon, France; 26-29 Aug. 1996", conftitle = "Proceedings of European Conference on Parallel Processing EURO-PAR '96", corpsource = "Oak Ridge Nat. Lab., TN, USA", keywords = "parallel processing (electronic computers) -- congresses", pubcountry = "Germany", treatment = "P Practical", } @Proceedings{Ciancarini:1996:CLM, editor = "Paolo Ciancarini and Chris Hankin", booktitle = "{Coordination languages and models: First International Conference COORDINATION '96, Cesena, Italy, April 15--17, 1996: proceedings}", title = "{Coordination languages and models: First International Conference COORDINATION '96, Cesena, Italy, April 15--17, 1996: proceedings}", number = "1061", publisher = pub-SV, address = pub-SV:adr, pages = "xi + 443", year = "1996", ISBN = "3-540-61052-9", ISBN-13 = "978-3-540-61052-6", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.I52 1996", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, } @Proceedings{Dongarra:1996:APC, editor = "Jack J. Dongarra and Kay Madsen and Jerzy Wasniewski", booktitle = "{Applied parallel computing: computations in physics, chemistry, and engineering science: second international workshop, PARA '95, Lyngby, Denmark, August 21--24, 1995: proceedings}", title = "{Applied parallel computing: computations in physics, chemistry, and engineering science: second international workshop, PARA '95, Lyngby, Denmark, August 21--24, 1995: proceedings}", volume = "1041", publisher = pub-SV, address = pub-SV:adr, pages = "562", year = "1996", ISBN = "3-540-60902-4", ISBN-13 = "978-3-540-60902-5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.P35 1995", bibdate = "Wed Aug 14 10:49:23 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, sponsor = "Danish Computing Centre for Research and Education Technical University of Denmark. Institute of Mathematical Modeling Danish Natural Science Research Council.", } @Proceedings{El-Rewini:1996:PTN, editor = "Hesham El-Rewini and Bruce D. Shriver", booktitle = "{Proceedings of the Twenty-Ninth Hawaii International Conference on System Sciences (HICSS-29): Wailea, HI, USA, 3--6 January 1996}", title = "{Proceedings of the Twenty-Ninth Hawaii International Conference on System Sciences (HICSS-29): Wailea, HI, USA, 3--6 January 1996}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "various", year = "1996", ISBN = "0-8186-7324-9", ISBN-13 = "978-0-8186-7324-5", ISSN = "1060-3425", LCCN = "????", bibdate = "Wed Apr 16 14:12:08 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Five volumes.", acknowledgement = ack-nhfb, } @Proceedings{Grangeat:1996:PTI, editor = "Pierre Grangeat and Jean-Louis Amans", booktitle = "{Proceedings of the Third International Meeting on Fully Three-Dimensional Image Reconstruction in Radiology and Nuclear Medicine, held July 4--6, 1995 at Domaine d'Aix-Marlioz, Aix-les-Bains, France}", title = "{Proceedings of the Third International Meeting on Fully Three-Dimensional Image Reconstruction in Radiology and Nuclear Medicine, held July 4--6, 1995 at Domaine d'Aix-Marlioz, Aix-les-Bains, France}", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "x + 315", year = "1996", ISBN = "0-7923-4129-5", ISBN-13 = "978-0-7923-4129-1", LCCN = "R857.T47 T485 1996", bibdate = "Wed Apr 16 10:20:43 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Grinstein:1996:VDE, editor = "Georges G. Grinstein and Robert F. Erbacher", booktitle = "{Visual data exploration and analysis III: 31 January--2 February, 1996, San Jose, California}", title = "{Visual data exploration and analysis III: 31 January--2 February, 1996, San Jose, California}", volume = "2421 (or 2656??)", publisher = pub-SPIE, address = pub-SPIE:adr, pages = "ix + 404", year = "1996", CODEN = "PSISDG", ISBN = "0-8194-2030-1", ISBN-13 = "978-0-8194-2030-5", ISSN = "0277-786X (print), 1996-756X (electronic)", LCCN = "TS510.S63 v.2656", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = j-PROC-SPIE, acknowledgement = ack-nhfb, confsponsor = "SPIE; Soc. Imaginag Sci. and Technol", } @Proceedings{IEEE:1996:EIS, editor = "{IEEE}", booktitle = "{Eighth IEEE Symposium on Parallel and Distributed Processing: October 23--26, 1996, New Orleans, Louisiana}", title = "{Eighth IEEE Symposium on Parallel and Distributed Processing: October 23--26, 1996, New Orleans, Louisiana}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xv + 618", year = "1996", ISBN = "0-8186-7683-3, 0-8186-7685-X (microfiche)", ISBN-13 = "978-0-8186-7683-3, 978-0-8186-7685-7 (microfiche)", LCCN = "QA76.58 .I42 1996", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE Computer Society Press order number PR07683. IEEE Order Plan catalog number 96TB100088.", acknowledgement = ack-nhfb, keywords = "electronic data processing -- distributed processing -- congresses; parallel processing (electronic computers) -- congresses", } @Proceedings{IEEE:1996:FSS, editor = "{IEEE}", booktitle = "{Frontiers'96, the Sixth Symposium on the Frontiers of Massively Parallel Computation: October 27--31, 1996, Annapolis, Maryland: proceedings}", title = "{Frontiers'96, the Sixth Symposium on the Frontiers of Massively Parallel Computation: October 27--31, 1996, Annapolis, Maryland: proceedings}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiv + 372", year = "1996", ISBN = "0-8186-7551-9", ISBN-13 = "978-0-8186-7551-5", LCCN = "QA76.58 .S95 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 96TB100062.", acknowledgement = ack-nhfb, corpsource = "Numerical Aerodynamic Simulation, NASA Ames Res. Center, Moffett Field, CA, USA; Centro Svizzero di Calcolo Sci., Manno, Switzerland", sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center; URSA/CESDIS", treatment = "P Practical", } @Proceedings{IEEE:1996:ICH, editor = "{IEEE}", booktitle = "{3rd International Conference on High Performance Computing: proceedings, December 19--22, 1996, Trivandrum, India}", title = "{3rd International Conference on High Performance Computing: proceedings, December 19--22, 1996, Trivandrum, India}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvi + 476", year = "1996", ISBN = "0-8186-7557-8", ISBN-13 = "978-0-8186-7557-7", LCCN = "QA76.88.I575 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 96TB100074.", acknowledgement = ack-nhfb, conflocation = "Trivandrum, India; 19-22 Dec. 1996", conftitle = "Proceedings of 3rd International Conference on High Performance Computing (HiPC)", corpsource = "Software Technol. Group, Swiss Center for Sci. Comput., Manno, Switzerland; Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL, USA", sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @Proceedings{IEEE:1996:PFE, editor = "{IEEE}", booktitle = "{Proceedings of the fourth Euromicro Workshop on Parallel and Distributed Processing (PDP '96): January 24--26, 1996, Braga, Portugal}", title = "{Proceedings of the fourth Euromicro Workshop on Parallel and Distributed Processing (PDP '96): January 24--26, 1996, Braga, Portugal}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xv + 551", year = "1996", ISBN = "0-8186-7376-1", ISBN-13 = "978-0-8186-7376-4", LCCN = "QA76.58 .E97 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE order number PR07376.", acknowledgement = ack-nhfb, conflocation = "Braga, Portugal; 24-26 Jan. 1996", conftitle = "Proceedings of 4th Euromicro Workshop on Parallel and Distributed Processing", corpsource = "Coimbra Univ., Portugal", keywords = "electronic data processing -- distributed processing -- congresses; parallel processing (electronic computers) -- congresses; parallel programming (computer science) -- congresses", treatment = "P Practical", } @Proceedings{IEEE:1996:PFI, editor = "{IEEE}", booktitle = "{Proceedings of the Fifth IEEE International Symposium on High Performance Distributed Computing, Syracuse, NY, USA, 6--9 August 1996}", title = "{Proceedings of the Fifth IEEE International Symposium on High Performance Distributed Computing, Syracuse, NY, USA, 6--9 August 1996}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xviii + 642", year = "1996", ISBN = "0-8186-7582-9", ISBN-13 = "978-0-8186-7582-9", LCCN = "QA 76.88 I52 1996", bibdate = "Tue May 12 08:55:41 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number TB100069.", acknowledgement = ack-nhfb, corpsource = "NSF Eng. Res. Center for Comput. Field Simulation, Mississippi State Univ., MS, USA", sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process.; Northeast Parallel Architectures Center; New York State Center for Adv. Technol. Comput. Applications and Software Eng. (CASE Center) at Syracuse Univ.; Rome Lab", treatment = "P Practical", } @Proceedings{IEEE:1996:PII, editor = "{IEEE}", booktitle = "{Proceedings of IPPS '96. The 10th International Parallel Processing Symposium: Honolulu, HI, USA, 15--19 April 1996}", title = "{Proceedings of IPPS '96. The 10th International Parallel Processing Symposium: Honolulu, HI, USA, 15--19 April 1996}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxviii + 903", year = "1996", ISBN = "0-8186-7255-2", ISBN-13 = "978-0-8186-7255-2", LCCN = "QA76.58 .I565 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 96TB100038. IEEE Computer Society Press order number PR07255.", acknowledgement = ack-nhfb, conflocation = "Honolulu, HI, USA; 15-19 April 1996", conftitle = "Proceedings of International Conference on Parallel Processing", corpsource = "Mississippi State Univ., MS, USA; Inst. fur Inf., Tech. Univ. Munchen, Germany", sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM SIGARCH", treatment = "P Practical", } @Proceedings{IEEE:1996:PIS, editor = "{IEEE}", booktitle = "{Proceedings of 1996 IEEE Second International Conference on Algorithms and Architectures for Parallel Processing, ICA PP '96: June 11--13, 1996, Singapore}", title = "{Proceedings of 1996 IEEE Second International Conference on Algorithms and Architectures for Parallel Processing, ICA PP '96: June 11--13, 1996, Singapore}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xix + 547", year = "1996", ISBN = "0-7803-3529-5 (softbound), 0-7803-3530-9 (microfiche)", ISBN-13 = "978-0-7803-3529-5 (softbound), 978-0-7803-3530-1 (microfiche)", LCCN = "QA76.58.I33 1996", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 96TH8204.", acknowledgement = ack-nhfb, keywords = "electronic digital computers -- programming -- congresses; multiprocessors -- programming -- congresses; parallel processing (electronic computers) -- congresses", } @Proceedings{IEEE:1996:PSI, editor = "{IEEE}", booktitle = "{Proceedings of the Seventh Israeli Conference on Computer Systems and Software Engineering: June 12--13, 1996, Herzliya, Israel}", title = "{Proceedings of the Seventh Israeli Conference on Computer Systems and Software Engineering: June 12--13, 1996, Herzliya, Israel}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "viii + 151", year = "1996", ISBN = "0-8186-7536-5", ISBN-13 = "978-0-8186-7536-2", LCCN = "QA75.5 .I75 1996", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE Computer Society Press Order Number PR07536.", acknowledgement = ack-nhfb, keywords = "software engineering -- Israel -- congresses; system design -- congresses", } @Proceedings{IEEE:1996:PSM, editor = "{IEEE}", booktitle = "{Proceedings. Second MPI Developer's Conference: Notre Dame, IN, USA, 1--2 July 1996}", title = "{Proceedings. Second MPI Developer's Conference: Notre Dame, IN, USA, 1--2 July 1996}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "ix + 207", year = "1996", ISBN = "0-8186-7533-0", ISBN-13 = "978-0-8186-7533-1", LCCN = "QA76.642 .M67 1996", bibdate = "Tue May 12 08:56:04 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed Process", } @Proceedings{Jacoby:1996:ADA, editor = "G. H. (George H.) Jacoby and Jeannette V. Barnes", booktitle = "{Astronomical data analysis software and systems V: meeting held at Tucson, Arizona, 23--25 October 1995}", title = "{Astronomical data analysis software and systems V: meeting held at Tucson, Arizona, 23--25 October 1995}", volume = "101", publisher = "Astronomical Society of the Pacific", address = "San Francisco, CA, USA", pages = "xxxvii + 607", year = "1996", ISBN = "????", ISBN-13 = "????", ISSN = "1080-7926", LCCN = "QB51.3.E43 A87 1995", bibdate = "Wed Apr 16 14:14:55 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Astronomical Society of the Pacific Conference Series", acknowledgement = ack-nhfb, } @Proceedings{Jain:1996:IOP, editor = "Ravi Jain and John Werth and James C. Browne", booktitle = "{Input\slash output and parallel and distributed computer systems}", title = "{Input\slash output and parallel and distributed computer systems}", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "xiv + 395", year = "1996", ISBN = "0-7923-9735-5", ISBN-13 = "978-0-7923-9735-9", LCCN = "QA76.58.I485 1996", bibdate = "Mon Apr 21 11:26:01 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Li:1996:PSI, editor = "G.-J. Li and D. F. Hsu and S. Horiguchi and B. Maggs", booktitle = "{Proceedings. Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN '96): June 12--14, 1996, Beijing, China}", title = "{Proceedings. Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN '96): June 12--14, 1996, Beijing, China}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xiii + 567", year = "1996", ISBN = "0-8186-7460-1", ISBN-13 = "978-0-8186-7460-0", LCCN = "QA76.58.I5673 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 96TB100044.", acknowledgement = ack-nhfb, corpsource = "Dept. of Comput. Sci., Australian Nat. Univ., Canberra, ACT, Australia", sponsororg = "Chinese Nat. Res. Center for Intelligent Comput. Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee on Parallel Process.; Steering Committee of the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc. Japan; Chinese Comput. Federation; IEICE Inf. and Syst. Soc", treatment = "P Practical", } @Proceedings{Li:1996:SIS, editor = "Guo-Jie Li", booktitle = "{Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN '96): proceedings, June 12--14, 1996, Beijing, China}", title = "{Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN '96): proceedings, June 12--14, 1996, Beijing, China}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xi + 567", year = "1996", ISBN = "0-8186-7460-1", ISBN-13 = "978-0-8186-7460-0", LCCN = "QA76.58.I565 1996", bibdate = "Sat Oct 21 15:20:00 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 94TH0697-3.", acknowledgement = ack-nhfb, keywords = "computer algorithms -- congresses; computer architecture -- congresses; computer networks -- congreses; parallel processing (electronic computers) -- congresses", } @Proceedings{Liddell:1996:HPC, editor = "Heather Mary Liddell and A. Colbrook and B. Hertzberger and P. Sloot", booktitle = "{High-performance computing and networking: international conference and exhibition, HPCN EUROPE 1966, Brussels, Belgium, April 15--19, 1996: proceedings}", title = "{High-performance computing and networking: international conference and exhibition, HPCN EUROPE 1966, Brussels, Belgium, April 15--19, 1996: proceedings}", volume = "1067", publisher = pub-SV, address = pub-SV:adr, pages = "xxv + 1040", year = "1996", ISBN = "3-540-61142-8 (paperback)", ISBN-13 = "978-3-540-61142-4 (paperback)", LCCN = "QA76.88 .H52 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, conflocation = "Brussels, Belgium; 15-19 April 1996", conftitle = "High-Performance Computing and Networking. International Conference and Exhibition HPCN Europe 1996", corpsource = "Zentrum fur Paralleles Rechnen, Koln Univ., Germany; German Nat. Res. Center for Inf. Technol., St. Augustin, Germany; Dept. of Electron. and Comput. Sci., Southampton Univ., UK; Dept. of Inf., Basel Univ., Switzerland", keywords = "computer networks -- congresses; supercomputers -- congresses", pubcountry = "Germany", treatment = "T Theoretical or Mathematical; P Practical", } @Proceedings{Reeves:1996:PIC, editor = "A. Reeves", booktitle = "{Proceedings of the 1996 International Conference on Challenges for Parallel Processing, Ithaca, NY, USA, August 12, 1996}", title = "{Proceedings of the 1996 International Conference on Challenges for Parallel Processing, Ithaca, NY, USA, August 12, 1996}", volume = "1", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xvi + 278 (vol. 1), xv + 173 (vol. 2), 230 (vol. 3)", year = "1996", ISBN = "0-8186-7623-X", ISBN-13 = "978-0-8186-7623-9", LCCN = "QA76.58 .I34 1996", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Three volumes.", acknowledgement = ack-nhfb, conftitle = "Proceedings of 25th International Conference on Parallel Processing", corpsource = "Comput. Sci. Div., Berkeley Univ., CA, USA", sponsororg = "Int. Assoc. Comput. and Commun.; Pennsylvania State Univ", treatment = "P Practical", xxeditor = "Howard Jay Segal", } @Proceedings{Silvester:1996:SEE, editor = "P. P. Silvester", booktitle = "{Software for electrical engineering analysis and design: Third International Conference on Software for Electrical Engineering Analysis and Design, Electrosoft '96, Pisa, Italy}", title = "{Software for electrical engineering analysis and design: Third International Conference on Software for Electrical Engineering Analysis and Design, Electrosoft '96, Pisa, Italy}", publisher = "Computational Mechanics Publications", address = "Boston, MA, USA", pages = "509", year = "1996", ISBN = "1-85312-395-1", ISBN-13 = "978-1-85312-395-5", LCCN = "TK5.I59 1996", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "electric engineering -- computer programs -- congresses", } @Proceedings{Szymanski:1996:LCR, editor = "Boleslaw K. Szymanski and Balaram Sinharoy", booktitle = "{Languages, Compilers and Run-Time Systems for Scalable Computers, 22--24 May 1995, Troy, NY, USA}", title = "{Languages, Compilers and Run-Time Systems for Scalable Computers, 22--24 May 1995, Troy, NY, USA}", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "xiv + 335", year = "1996", ISBN = "0-7923-9635-9", ISBN-13 = "978-0-7923-9635-2", LCCN = "QA76.58.L37 1996", bibdate = "Sun Dec 22 10:19:23 MST 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Toussaint:1996:AES, editor = "Marcel Toussaint", booktitle = "{Ada in Europe: Second International Eurospace-Ada-Europe Symposium, Frankfurt\slash Main, Germany, October 2--6, 1995: proceedings}", title = "{Ada in Europe: Second International Eurospace-Ada-Europe Symposium, Frankfurt\slash Main, Germany, October 2--6, 1995: proceedings}", number = "1031", publisher = pub-SV, address = pub-SV:adr, pages = "xi + 455", year = "1996", ISBN = "3-540-60757-9", ISBN-13 = "978-3-540-60757-1", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.73.A35I57 1995", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, sponsor = "Eurospace Ada-Europe.", } @Proceedings{Wasniewski:1996:APC, editor = "Jerzy Wasniewski", booktitle = "{Applied parallel computing: industrial computation and optimization: Third International Workshop, PARA '96, Lyngby, Denmark, August 18--21, 1996: proceedings}", title = "{Applied parallel computing: industrial computation and optimization: Third International Workshop, PARA '96, Lyngby, Denmark, August 18--21, 1996: proceedings}", volume = "1184", publisher = pub-SV, address = pub-SV:adr, pages = "xiii + 722", year = "1996", ISBN = "3-540-62095-8", ISBN-13 = "978-3-540-62095-2", LCCN = "QA76.58 .P35 1996", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Lecture notes in computer science", acknowledgement = ack-nhfb, keywords = "parallel processing (electronic computers) -- congresses", } @Proceedings{Yetongnon:1996:PII, editor = "K. Yetongnon and S. Hariri", booktitle = "{Proceedings of the ISCA International Conference. Parallel and Distributed Computing Systems: Dijon, France, 25--27 September 1996 (PDCS '96: 9th)}", title = "{Proceedings of the ISCA International Conference. Parallel and Distributed Computing Systems: Dijon, France, 25--27 September 1996 (PDCS '96: 9th)}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "????", year = "1996", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Apr 16 14:20:56 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Zaky:1996:PDT, editor = "Amr Zaky and Ted Lewis", booktitle = "Tools and environments for parallel and distributed systems", title = "{Program development tools and environments for parallel and distributed systems: Session; 28th Hawaii international conference on system sciences --- 1995}", volume = "2", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "viii + 305", year = "1996", ISBN = "0-7923-9675-8", ISBN-13 = "978-0-7923-9675-8", LCCN = "QA76.58.T65 1996", bibdate = "Wed Aug 14 09:02:28 MDT 1996", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "Kluwer International Series in Software Engineering", acknowledgement = ack-nhfb, } @Proceedings{ACM:1997:PPS, editor = "{ACM}", booktitle = "{PASCO '97. Proceedings of the second international symposium on parallel symbolic computation, July 20--22, 1997, Maui, HI}", title = "{PASCO '97. Proceedings of the second international symposium on parallel symbolic computation, July 20--22, 1997, Maui, HI}", publisher = pub-ACM, address = pub-ACM:adr, pages = "????", year = "1997", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Mar 12 07:30:53 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, xxnote = "Check editor; proceedings not yet in LC, UC-Melvyl, or OCLC??", } @Proceedings{ACM:1997:SHP, editor = "{ACM}", booktitle = "{SC'97: High Performance Networking and Computing: Proceedings of the 1997 ACM\slash IEEE SC97 Conference: November 15--21, 1997, San Jose, California, USA}", title = "{SC'97: High Performance Networking and Computing: Proceedings of the 1997 ACM\slash IEEE SC97 Conference: November 15--21, 1997, San Jose, California, USA}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "vii + 159", year = "1997", ISBN = "0-89791-985-8", ISBN-13 = "978-0-89791-985-2", LCCN = "QA76.9.A25 A265 1997", bibdate = "Sat Mar 21 09:10:00 1998", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM SIGARCH order number 415972. IEEE Computer Society Press order number RS00160.", URL = "http://www.acm.org/pubs/contents/proceedings/commsec/266741/; http://www.supercomp.org/sc97/proceedings/", acknowledgement = ack-nhfb, xxnote = "Check ISBN: UC/Melvyl has this one for ``Proceedings / Second ACM Workshop on Role-Based Access Control, Fairfax, Virginia, USA, November 6--7, 1997''.", } @Proceedings{Boisvert:1997:QNS, editor = "R. F. Boisvert", booktitle = "{Quality of numerical software: assessment and enhancement / proceedings of the IFIP TC2/WG2.5 Working Conference on the Quality of Numerical Software, Assessment and Enhancement, Oxford, United Kingdom, 8--12 July 1996}", title = "{Quality of numerical software: assessment and enhancement / proceedings of the IFIP TC2/WG2.5 Working Conference on the Quality of Numerical Software, Assessment and Enhancement, Oxford, United Kingdom, 8--12 July 1996}", publisher = pub-CHAPMAN-HALL, address = pub-CHAPMAN-HALL:adr, pages = "vii + 384", year = "1997", ISBN = "0-412-80530-8", ISBN-13 = "978-0-412-80530-1", LCCN = "QA297 .I35 1996", bibdate = "Thu Sep 16 09:48:36 MDT 1999", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, sponsor = "IFIP; Technical Committee 2/Working Group 2.5.", } @Proceedings{Bubak:1997:RAP, editor = "Marian Bubak and J. J. Dongarra and Jerzy Wasniewski", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 4th European PVM\slash MPI user's group meeting Cracow, Poland, November 3--5, 1997: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 4th European PVM\slash MPI user's group meeting Cracow, Poland, November 3--5, 1997: proceedings}", volume = "1332", publisher = pub-SV, address = pub-SV:adr, pages = "xv + 518", year = "1997", CODEN = "LNCSD9", ISBN = "3-540-63697-8 (paperback)", ISBN-13 = "978-3-540-63697-7 (paperback)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58.E973 1997", bibdate = "Mon Nov 24 09:49:54 MST 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "Computer networks -- Congresses.; Parallel computers -- Congresses.", } @Proceedings{IEEE:1997:APD, editor = "{IEEE}", booktitle = "{Advances in parallel and distributed computing: March 19--21, 1997, Shanghai, China: proceedings}", title = "{Advances in parallel and distributed computing: March 19--21, 1997, Shanghai, China: proceedings}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xii + 426", year = "1997", ISBN = "0-8186-7876-3 (paperback and case), 0-8186-7878-X (microfiche)", ISBN-13 = "978-0-8186-7876-9 (paperback and case), 978-0-8186-7878-3 (microfiche)", LCCN = "QA76.58 .A4 1997", bibdate = "Wed Apr 16 07:34:31 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, keywords = "electronic data processing -- distributed processing -- congresses; parallel processing (electronic computers) -- congresses", } @Proceedings{IEEE:1997:PIP, editor = "{IEEE}", booktitle = "{Proceedings. 11th International Parallel Processing Symposium, April 1--5, 1997, Geneva, Switzerland}", title = "{Proceedings. 11th International Parallel Processing Symposium, April 1--5, 1997, Geneva, Switzerland}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xxi + 765", year = "1997", ISBN = "0-8186-7793-7", ISBN-13 = "978-0-8186-7793-9", LCCN = "QA76.58 .I56 1997", bibdate = "Thu May 21 19:02:04 MDT 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 97TB100107. IEEE Computer Society Press order number PR07792", acknowledgement = ack-nhfb, conftitle = "Proceedings 11th International Parallel Processing Symposium", corpsource = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT, USA", sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel Process.; ACM SIGARCH; Eur. Assoc. Theor. Comput. Sci. (EATCS); Swiss Special Interest Group on Parallelism (SIPAR); SPPEDUP Soc", treatment = "P Practical", } @Proceedings{IEEE:1997:TIS, editor = "{IEEE}", booktitle = "{Third International Symposium on High-Performance Computer Architecture: proceedings, February 1--5, 1997, San Antonio, Texas}", title = "{Third International Symposium on High-Performance Computer Architecture: proceedings, February 1--5, 1997, San Antonio, Texas}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "xi + 353", year = "1997", ISBN = "0-8186-7764-3", ISBN-13 = "978-0-8186-7764-9", LCCN = "QA76.9.A73I566 1997", bibdate = "Sat Apr 19 16:34:54 MDT 1997", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE catalog number 97TB100094.", acknowledgement = ack-nhfb, corpsource = "Hong Kong Univ., Hong Kong", sponsororg = "IEEE Computer. Soc. Tech. Committee on Comput. Archit", treatment = "P Practical", } @Proceedings{ACM:1998:AWJ, editor = "{ACM}", booktitle = "{ACM 1998 Workshop on Java for High-Performance Network Computing}", title = "{ACM 1998 Workshop on Java for High-Performance Network Computing}", publisher = pub-ACM, address = pub-ACM:adr, pages = "????", year = "1998", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Thu Apr 27 10:40:59 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Possibly unpublished, except electronically.", URL = "http://www.cs.ucsb.edu/conferences/java98/program.html", acknowledgement = ack-nhfb, } @Proceedings{ACM:1998:SHP, editor = "{ACM}", booktitle = "{SC'98: High Performance Networking and Computing: Proceedings of the 1998 ACM\slash IEEE SC98 Conference: Orange County Convention Center, Orlando, Florida, USA, November 7--13, 1998}", title = "{SC'98: High Performance Networking and Computing: Proceedings of the 1998 ACM\slash IEEE SC98 Conference: Orange County Convention Center, Orlando, Florida, USA, November 7--13, 1998}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "1998", ISBN = "????", ISBN-13 = "????", LCCN = "????", bibdate = "Wed Oct 07 08:51:34 1998", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.supercomp.org/sc98/papers/", acknowledgement = ack-nhfb, } @Proceedings{Alexandrov:1998:RAP, editor = "Vassil Alexandrov and J. J. Dongarra", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 5th European PVM\slash MPI User's Group Meeting, Liverpool, UK, September 7--9, 1998: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 5th European PVM\slash MPI User's Group Meeting, Liverpool, UK, September 7--9, 1998: proceedings}", volume = "1497", publisher = pub-SV, address = pub-SV:adr, pages = "xii + 412", year = "1998", ISBN = "3-540-65041-5 (softcover)", ISBN-13 = "978-3-540-65041-6 (softcover)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA267.A1 L43 no.1497", bibdate = "Mon May 3 11:00:13 MDT 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Jointly sponsored by the Computer Science Dept., University of Liverpool and Oak Ridge National Laboratory.", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "data transmission systems -- congresses; parallel computers -- congresses; virtual computer systems -- congresses", } @Proceedings{ACM:1999:SPO, editor = "{ACM}", booktitle = "{SC'99: Oregon Convention Center 777 NE Martin Luther King Jr. Boulevard, Portland, Oregon, November 11--18, 1999}", title = "{SC'99: Oregon Convention Center 777 NE Martin Luther King Jr. Boulevard, Portland, Oregon, November 11--18, 1999}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "1999", ISBN = "", ISBN-13 = "", LCCN = "", bibdate = "Thu Feb 24 09:35:00 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:1999:RAP, editor = "J. J. Dongarra and E. Luque and Tomas Margalef", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 6th European PVM\slash MPI Users' Group Meeting, Barcelona, Spain, September 26--29, 1999: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 6th European PVM\slash MPI Users' Group Meeting, Barcelona, Spain, September 26--29, 1999: proceedings}", volume = "1697", publisher = pub-SV, address = pub-SV:adr, pages = "xvii + 551", year = "1999", ISBN = "3-540-66549-8 (softcover)", ISBN-13 = "978-3-540-66549-6 (softcover)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 E973 1999", bibdate = "Wed Dec 8 06:34:56 MST 1999", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, alttitle = "PVM/MPI '99", keywords = "Data transmission systems; Parallel computers; Virtual computer systems", } @Proceedings{ACM:2000:SHP, editor = "{ACM}", booktitle = "{SC2000: High Performance Networking and Computing. Dallas Convention Center, Dallas, TX, USA, November 4--10, 2000}", title = "{SC2000: High Performance Networking and Computing. Dallas Convention Center, Dallas, TX, USA, November 4--10, 2000}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2000", ISBN = "", ISBN-13 = "", LCCN = "", bibdate = "Thu Feb 24 09:35:00 2000", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.sc2000.org/proceedings/info/fp.pdf", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:2000:RAP, editor = "J. J. Dongarra and Peter Kacsuk and Norbert Podhorszki", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 7th European PVM\slash MPI Users' Group Meeting, Balatonfured, Hungary, September 10--13, 2000: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 7th European PVM\slash MPI Users' Group Meeting, Balatonfured, Hungary, September 10--13, 2000: proceedings}", volume = "1908", publisher = pub-SV, address = pub-SV:adr, pages = "xv + 364", year = "2000", ISBN = "3-540-41010-4 (softcover)", ISBN-13 = "978-3-540-41010-2 (softcover)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", bibdate = "Mon Oct 16 18:31:56 MDT 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "data transmission systems -- congresses; parallel computers -- congresses; virtual computer systems -- congresses", } @Proceedings{Engquist:2000:SVG, editor = "Bj{\"o}rn Engquist", booktitle = "{Simulation and visualization on the grid: Parallelldatorcentrum, Kungl. Tekniska H{\"o}gskolan, seventh annual conference, Stockholm, Sweden, December 1999: proceedings}", title = "{Simulation and visualization on the grid: Parallelldatorcentrum, Kungl. Tekniska H{\"o}gskolan, seventh annual conference, Stockholm, Sweden, December 1999: proceedings}", volume = "13", publisher = pub-SV, address = pub-SV:adr, pages = "xiii + 300", year = "2000", ISBN = "3-540-67264-8", ISBN-13 = "978-3-540-67264-7", ISSN = "1439-7358", LCCN = "QA76.9.C65 S535 2000", bibdate = "Wed Oct 18 10:32:22 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCSE, acknowledgement = ack-nhfb, keywords = "Computer simulation -- Congresses. Visualization -- Congresses", } @Book{Koniges:2000:ISP, editor = "Alice E. Koniges", booktitle = "{Industrial Strength Parallel Computing}", title = "{Industrial Strength Parallel Computing}", publisher = pub-MORGAN-KAUFMANN, address = pub-MORGAN-KAUFMANN:adr, pages = "xxv + 597", year = "2000", ISBN = "1-55860-540-1", ISBN-13 = "978-1-55860-540-4", LCCN = "QA76.58 .I483 2000", bibdate = "Fri Feb 04 18:30:40 2000", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Reynders:2000:IPI, editor = "John Reynders and Alexander V. Veidenbaum", booktitle = "{ICS '00: Proceedings of the 14th international conference on Supercomputing: Santa Fe, New Mexico, USA, May 8--11, 2000}", title = "{ICS '00: Proceedings of the 14th international conference on Supercomputing: Santa Fe, New Mexico, USA, May 8--11, 2000}", publisher = pub-ACM, address = pub-ACM:adr, bookpages = "xi + 509", pages = "xi + 509", year = "2000", DOI = "https://doi.org/10.1145/335231", ISBN = "1-58113-270-0", ISBN-13 = "978-1-58113-270-0", LCCN = "QA76.88 .I573 2000", bibdate = "Fri Jul 27 05:22:06 2001", bibsource = "http://www.acm.org/pubs/contents/proceedings/supercomputing/335231/; https://www.math.utah.edu/pub/tex/bib/fparith.bib; https://www.math.utah.edu/pub/tex/bib/java2000.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "https://dl.acm.org/doi/proceedings/10.1145/335231", acknowledgement = ack-nhfb, keywords = "AS/400; ESA/390; IA-64; Java Virtual Machine (JVM); RS/6000", } @Proceedings{USENIX:2000:PAL, editor = "{USENIX}", booktitle = "{Proceedings of the 4th Annual Linux Showcase and Conference, Atlanta, October 10--14, 2000, Atlanta, Georgia, USA}", title = "{Proceedings of the 4th Annual Linux Showcase and Conference, Atlanta, October 10--14, 2000, Atlanta, Georgia, USA}", publisher = pub-USENIX, address = pub-USENIX:adr, pages = "394", year = "2000", ISBN = "1-880446-17-0", ISBN-13 = "978-1-880446-17-1", LCCN = "????", bibdate = "Wed Oct 16 06:06:36 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", URL = "http://www.usenix.org/publications/library/proceedings/als2000/", acknowledgement = ack-nhfb, } @Proceedings{ACM:2001:SHP, editor = "{ACM}", booktitle = "{SC2001: High Performance Networking and Computing. Denver, CO, November 10--16, 2001}", title = "{SC2001: High Performance Networking and Computing. Denver, CO, November 10--16, 2001}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2001", ISBN = "1-58113-293-X", ISBN-13 = "978-1-58113-293-9", LCCN = "????", bibdate = "Thu Feb 21 18:29:36 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Cotronis:2001:RAP, editor = "Yiannis Cotronis and J. J. Dongarra", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 8th European PVM\slash MPI Users' Group Meeting, Santorini\slash Thera, Greece, September 23--26, 2001: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 8th European PVM\slash MPI Users' Group Meeting, Santorini\slash Thera, Greece, September 23--26, 2001: proceedings}", volume = "2131", publisher = pub-SV, address = pub-SV:adr, pages = "xv + 438", year = "2001", ISBN = "3-540-42609-4 (paperback)", ISBN-13 = "978-3-540-42609-7 (paperback)", LCCN = "QA76.58 E975 2001; QA267.A1 L43 no.2131", bibdate = "Thu Jan 17 11:49:19 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS # " and " # ser-LNAI, URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm", acknowledgement = ack-nhfb, keywords = "data transmission systems -- congresses; parallel computers -- congresses; virtual computer systems -- congresses", } @Proceedings{Eigenmann:2001:OSM, editor = "Rudolf Eigenmann and Michael J. Voss", booktitle = "{OpenMP shared memory parallel programming: International Workshop on OpenMP Applications and Tools, WOMPAT 2001, West Lafayette, IN, USA, July 30--31, 2001: Proceedings}", title = "{OpenMP shared memory parallel programming: International Workshop on OpenMP Applications and Tools, WOMPAT 2001, West Lafayette, IN, USA, July 30--31, 2001: Proceedings}", volume = "2104", publisher = pub-SV, address = pub-SV:adr, pages = "x + 184", year = "2001", CODEN = "LNCSD9", DOI = "????", ISBN = "3-540-42346-X (paperback)", ISBN-13 = "978-3-540-42346-1 (paperback)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.642 .I589 2001; QA267.A1 L43 no.2104", bibdate = "Thu Jan 17 11:49:19 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/multithreading.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm", acknowledgement = ack-nhfb, keywords = "parallel programming (computer science) -- congresses", } @Proceedings{IEEE:2002:STI, editor = "{IEEE}", booktitle = "{SC2002: From Terabytes to Insight. Proceedings of the IEEE ACM SC 2002 Conference, November 16--22, 2002, Baltimore, MD, USA}", title = "{SC2002: From Terabytes to Insight. Proceedings of the IEEE ACM SC 2002 Conference, November 16--22, 2002, Baltimore, MD, USA}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "????", year = "2002", ISBN = "0-7695-1524-X", ISBN-13 = "978-0-7695-1524-3", LCCN = "????", bibdate = "Thu Feb 21 18:29:36 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Kranzlmuller:2002:RAP, editor = "Dieter Kranzlmuller", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 9th European PVM\slash MPI Users' Group Meeting, Linz, Austria, September 29--October 2, 2002: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 9th European PVM\slash MPI Users' Group Meeting, Linz, Austria, September 29--October 2, 2002: proceedings}", volume = "2474", publisher = pub-SV, address = pub-SV:adr, pages = "xvi + 462", year = "2002", ISBN = "3-540-44296-0 (softcover)", ISBN-13 = "978-3-540-44296-7 (softcover)", LCCN = "QA76.58 .E975 2002", bibdate = "Sun Dec 1 08:06:09 MST 2002", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Also available via the World Wide Web", series = ser-LNCS, acknowledgement = ack-nhfb, keywords = "data transmission systems --- congresses; parallel computers -- congresses; virtual computer systems -- congresses", } @Proceedings{Oldehoeft:2002:SIS, editor = "Rod Oldehoeft", booktitle = "{Special issue on software for high-performance systems: papers from the symposium of the Los Alamos Computer Science Institute, held in Santa Fe, NM, USA on October 15--18, 2001}", title = "{Special issue on software for high-performance systems: papers from the symposium of the Los Alamos Computer Science Institute, held in Santa Fe, NM, USA on October 15--18, 2001}", volume = "23(1)", publisher = pub-KLUWER, address = pub-KLUWER:adr, pages = "128", year = "2002", CODEN = "JOSUED", ISSN = "0920-8542 (print), 1573-0484 (electronic)", ISSN-L = "0920-8542", bibdate = "Wed Jan 14 07:13:03 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "The journal of supercomputing", acknowledgement = ack-nhfb, } @Proceedings{ACM:2003:SII, editor = "{ACM}", booktitle = "{SC2003: Igniting Innovation. Phoenix, AZ, November 15--21, 2003}", title = "{SC2003: Igniting Innovation. Phoenix, AZ, November 15--21, 2003}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2003", ISBN = "1-58113-695-1", ISBN-13 = "978-1-58113-695-1", LCCN = "????", bibdate = "Thu Feb 21 18:29:36 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Dongarra:2003:RAP, editor = "Jack Dongarra and Domenico Laforenza and Salvatore Orlando", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 10th European PVM\slash MPI User's group Meeting, Venice, Italy, September 29--October 2, 2003: Proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 10th European PVM\slash MPI User's group Meeting, Venice, Italy, September 29--October 2, 2003: Proceedings}", volume = "2840", publisher = pub-SV, address = pub-SV:adr, pages = "xviii + 693", year = "2003", CODEN = "LNCSD9", ISBN = "3-540-20149-1", ISBN-13 = "978-3-540-20149-6", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .E973 2003", bibdate = "Tue Jan 13 19:17:43 2004", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2840.htm", acknowledgement = ack-nhfb, } @Proceedings{Voss:2003:OSM, editor = "Michael J. Voss", booktitle = "{OpenMP shared memory parallel programming: International Workshop on OpenMP Applications and Tools, WOMPAT 2003, Toronto, Canada, June 26--27, 2003: Proceedings}", title = "{OpenMP shared memory parallel programming: International Workshop on OpenMP Applications and Tools, WOMPAT 2003, Toronto, Canada, June 26--27, 2003: Proceedings}", volume = "2716", publisher = pub-SV, address = pub-SV:adr, pages = "viii + 270", year = "2003", CODEN = "LNCSD9", DOI = "????", ISBN = "3-540-40435-X (softcover)", ISBN-13 = "978-3-540-40435-4 (softcover)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.642 .I589 2003", bibdate = "Thu Aug 21 09:09:03 MDT 2003", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2716.htm; http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=2716", acknowledgement = ack-nhfb, keywords = "parallel programming (computer science) --- congresses", } @Proceedings{ACM:2004:SHP, editor = "{ACM}", booktitle = "{SC 2004: High Performance Computing, Networking and Storage: Bridging communities: Proceedings of the IEEE\slash ACM Supercomputing 2004 Conference, Pittsburgh, PA, November 6--12, 2004}", title = "{SC 2004: High Performance Computing, Networking and Storage: Bridging communities: Proceedings of the IEEE\slash ACM Supercomputing 2004 Conference, Pittsburgh, PA, November 6--12, 2004}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2004", ISBN = "0-7695-2153-3", ISBN-13 = "978-0-7695-2153-4", LCCN = "????", bibdate = "Tue Dec 27 08:08:01 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Kranzlmuller:2004:RAP, editor = "Dieter Kranzlm{\"u}ller and P{\'e}ter Kacsuk and Jack J. Dongarra", booktitle = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 11th European PVM/MPI Users' Group Meeting, Budapest, Hungary, September 19--22, 2004: proceedings}", title = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 11th European PVM/MPI Users' Group Meeting, Budapest, Hungary, September 19--22, 2004: proceedings}", volume = "3241", publisher = pub-SV, address = pub-SV:adr, pages = "xiii + 452", year = "2004", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/b100820", ISBN = "3-540-23163-3", ISBN-13 = "978-3-540-23163-9", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .E973 2004", bibdate = "Sat Jun 4 05:55:05 MDT 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = ser-LNCS, URL = "http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=3241; http://www.springerlink.com/openurl.asp?genre=volume&id=doi:10.1007/b100820", acknowledgement = ack-nhfb, meetingname = "European PVM/MPI Users' Group Meeting (11th: 2004: Budapest, Hungary)", subject = "Parallel computers; Congresses; Virtual computer systems; Congresses; Data transmission systems; Congresses", } @Proceedings{ACM:2005:PAI, editor = "{ACM}", booktitle = "{Proceedings of the 2005 ACM\slash IEEE conference on Supercomputing 2005, Seattle, WA, November 12--18 2005}", title = "{Proceedings of the 2005 ACM\slash IEEE conference on Supercomputing 2005, Seattle, WA, November 12--18 2005}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2005", ISBN = "1-59593-061-2", ISBN-13 = "978-1-59593-061-3", LCCN = "????", bibdate = "Tue Dec 27 08:08:01 2005", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Beyer:2005:GEC, editor = "Hans-Georg Beyer and others", booktitle = "{Genetic and Evolutionary Computation Conference: GECCO 2005, June 25--29, 2005 (Saturday-Wednesday) Washington, DC, USA}", title = "{Genetic and Evolutionary Computation Conference: GECCO 2005, June 25--29, 2005 (Saturday-Wednesday) Washington, DC, USA}", publisher = pub-ACM, address = pub-ACM:adr, pages = "????", year = "2005", ISBN = "1-59593-010-8 (paperback)", ISBN-13 = "978-1-59593-010-1 (paperback)", LCCN = "QA76.623 .G44 2005", bibdate = "Tue Mar 6 06:24:38 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/prng.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", note = "ACM order number 910050.", acknowledgement = ack-nhfb, subject = "Genetic algorithms; Data processing; Congresses; Parallel processing (Electronic computers)", } @Proceedings{Chapman:2005:SMP, editor = "Barbara M. Chapman", booktitle = "{Shared memory parallel programming with OpenMP: 5th International Workshop on OpenMP Applications and Tools, WOMPAT 2004, Houston, TX, USA, May 17--18, 2004: Revised selected papers}", title = "{Shared memory parallel programming with OpenMP: 5th International Workshop on OpenMP Applications and Tools, WOMPAT 2004, Houston, TX, USA, May 17--18, 2004: Revised selected papers}", volume = "3349", publisher = pub-SV, address = pub-SV:adr, pages = "x + 147", year = "2005", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/b105895", ISBN = "3-540-24560-X", ISBN-13 = "978-3-540-24560-5", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76 .A1 L42 NO.3349", bibdate = "Thu Jun 2 07:26:02 MDT 2005", bibsource = "clavis.ucalgary.ca:2200/UNICORN; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=3349; http://www.springerlink.com/openurl.asp?genre=volume&id=doi:10.1007/b105895", acknowledgement = ack-nhfb, meetingname = "International Workshop on OpenMP Applications and Tools (2004: Houston, Tex.)", subject = "Parallel programming (Computer science); Congresses", } @Proceedings{DiMartino:2005:RAP, editor = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and J. J. Dongarra", booktitle = "{Recent advances in parallel virtual machine and message passing interface: 12th European PVM/MPI User's Group Meeting, Sorrento, Italy, September 18--21, 2005: proceedings}", title = "{Recent advances in parallel virtual machine and message passing interface: 12th European PVM/MPI User's Group Meeting, Sorrento, Italy, September 18--21, 2005: proceedings}", volume = "3666", publisher = pub-SV, address = pub-SV:adr, pages = "xvii + 546", year = "2005", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/11557265", ISBN = "3-540-29009-5 (paperback)", ISBN-13 = "978-3-540-29009-4 (paperback)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", LCCN = "QA76.58 .E973 2005", bibdate = "Wed Apr 5 19:31:25 MDT 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; z3950.loc.gov:7090/Voyager", series = ser-LNCS, URL = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0302-9743&volume=3666", acknowledgement = ack-nhfb, meetingname = "European PVM/MPI Users' Group Meeting (12th: 2005: Sorrento, Italy)", subject = "Parallel computers; Congresses; Virtual computer systems; Data transmission systems", } @Proceedings{IEEE:2005:IPD, editor = "{IEEE}", booktitle = "{19th International Parallel and Distributed Processing Symposium: proceedings: April 4--8, 2005, Denver, Colorado}", title = "{19th International Parallel and Distributed Processing Symposium: proceedings: April 4--8, 2005, Denver, Colorado}", publisher = pub-IEEE, address = pub-IEEE:adr, pages = "lv + 311", year = "2005", ISBN = "0-7695-2312-9", ISBN-13 = "978-0-7695-2312-5", LCCN = "????", bibdate = "Fri May 27 14:11:22 2005", bibsource = "https://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "IEEE Computer Society Order Number P2312.", acknowledgement = ack-nhfb, } @Proceedings{ACM:2006:PCC, editor = "{ACM}", booktitle = "{Proceedings of the 3rd conference on Computing Frontiers, May 3--5, 2006, Ischia, Italy}", title = "{Proceedings of the 3rd conference on Computing Frontiers, May 3--5, 2006, Ischia, Italy}", publisher = pub-ACM, address = pub-ACM:adr, year = "2006", ISBN = "1-59593-302-6", ISBN-13 = "978-1-59593-302-7", LCCN = "", bibdate = "Tue Jun 20 06:45:04 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order number 104060.", acknowledgement = ack-nhfb, } @Proceedings{ACM:2006:PST, editor = "{ACM}", booktitle = "{Proceedings of the 37th SIGCSE technical symposium on Computer science education 2006, Houston, Texas, USA, March 03--05, 2006}", title = "{Proceedings of the 37th SIGCSE technical symposium on Computer science education 2006, Houston, Texas, USA, March 03--05, 2006}", publisher = pub-ACM, address = pub-ACM:adr, pages = "????", year = "2006", ISBN = "1-59593-259-3", ISBN-13 = "978-1-59593-259-4", LCCN = "", bibdate = "Tue Jun 20 06:53:22 2006", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "ACM order number 457060.", acknowledgement = ack-nhfb, } @Proceedings{Mohr:2006:RAP, editor = "Bernd Mohr and Jesper Larsson Tr{\"a}ff and Joachim Worringen and Jack Dongarra", booktitle = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 13th European PVM\slash MPI User's Group Meeting Bonn, Germany, September 17--20, 2006 Proceedings}", title = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 13th European PVM\slash MPI User's Group Meeting Bonn, Germany, September 17--20, 2006 Proceedings}", volume = "4192", publisher = pub-SV, address = pub-SV:adr, pages = "104 (est.)", year = "2006", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/11846802", ISBN = "3-540-39110-X (print), 3-540-39112-6 (e-book)", ISBN-13 = "978-3-540-39110-4 (print), 978-3-540-39112-8 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:21:40 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-39112-8", acknowledgement = ack-nhfb, } @Proceedings{Cappello:2007:RAP, editor = "Franck Cappello and Thomas Herault and Jack Dongarra", booktitle = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 14th European PVM\slash MPI User's Group Meeting, Paris, France, September 30 --- October 3, 2007. Proceedings}", title = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 14th European PVM\slash MPI User's Group Meeting, Paris, France, September 30 --- October 3, 2007. Proceedings}", volume = "4757", publisher = pub-SV, address = pub-SV:adr, pages = "116 (est.)", year = "2007", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-540-75416-9", ISBN = "3-540-75415-6 (print), 3-540-75416-4 (e-book)", ISBN-13 = "978-3-540-75415-2 (print), 978-3-540-75416-9 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:25:09 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-75416-9", acknowledgement = ack-nhfb, } @Proceedings{Simos:2007:CMS, editor = "Theodore E. Simos and George Maroulis", booktitle = "{Computation in Modern Science and Engineering: Proceedings of the [Fifth] International Conference on Computational Methods in Science and Engineering 2007 (ICCMSE 2007), Corfu, Greece, 25--30 September 2007}", title = "{Computation in Modern Science and Engineering: Proceedings of the [Fifth] International Conference on Computational Methods in Science and Engineering 2007 (ICCMSE 2007), Corfu, Greece, 25--30 September 2007}", volume = "2A, 2B", publisher = pub-AIP, address = pub-AIP:adr, bookpages = "xxvi + 730 + 10 (vol. 2A)", pages = "xxvi + 730 + 10 (vol. 2A)", year = "2007", ISBN = "0-7354-0476-3 (set), 0-7354-0477-1 (vol. 1), 0-7354-0478-X (vol. 2)", ISBN-13 = "978-0-7354-0476-2 (set), 978-0-7354-0477-9 (vol. 1), 978-0-7354-0478-6 (vol. 2)", ISSN = "0094-243X (print), 1551-7616 (electronic), 1935-0465", LCCN = "Q183.9 .I524 2007", bibdate = "Thu Feb 21 14:15:15 2008", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = "AIP Conference Proceedings (\#963)", URL = "http://www.springer.com/physics/atoms/book/978-0-7354-0478-6", abstract = "All papers have been peer-reviewed. The aim of ICCMSE 2007 is to bring together computational scientists and engineers from several disciplines in order to share methods, methodologies and ideas. The potential readers of these proceedings are all the scientists with interest in the following fields: Computational Mathematics, Theoretical Physics, Computational Physics, Theoretical Chemistry, Computational Chemistry, Mathematical Chemistry, Computational Engineering, Computational Mechanics, Computational Biology and Medicine, Scientific Computation, High Performance Computing, Parallel and Distributed Computing, Visualization, Problem Solving Environments, Software Tools, Advanced Numerical Algorithms, Modeling and Simulation of Complex Systems, Web-based Simulation and Computing, Grid-based Simulation and Computing, Computational Grids, and Computer Science.", acknowledgement = ack-nhfb, remark = "Two volumes.", } @Proceedings{Bischof:2008:AAD, editor = "Christian H. Bischof and H. Martin B{\"u}cker and Paul Hovland and Uwe Naumann and Jean Utke", booktitle = "Advances in Automatic Differentiation", title = "Advances in Automatic Differentiation", volume = "64", publisher = pub-SV, address = pub-SV:adr, bookpages = "xviii + 362", pages = "xviii + 362", year = "2008", CODEN = "LNCSA6", DOI = "https://doi.org/10.1007/978-3-540-68942-3", ISBN = "3-540-68935-4 (print), 3-540-68942-7 (e-book)", ISBN-13 = "978-3-540-68935-5 (print), 978-3-540-68942-3 (e-book)", ISSN = "1439-7358", ISSN-L = "1439-7358", LCCN = "QA304 .I58 2008", bibdate = "Thu Dec 20 14:35:07 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCSE, URL = "http://link.springer.com/book/10.1007/978-3-540-68942-3; http://www.springerlink.com/content/978-3-540-68942-3", acknowledgement = ack-nhfb, remark = "The Fifth International Conference on Automatic Differentiation held from August 11 to 15, 2008 in Bonn, Germany, is the most recent one in a series that began in Breckenridge, USA, in 1991 and continued in Santa Fe, USA, in 1996, Nice, France, in 2000 and Chicago, USA, in 2004.", series-URL = "http://link.springer.com/bookseries/3527", } @Proceedings{Chapman:2008:PPM, editor = "Barbara Chapman and Weiming Zheng and Guang R. Gao and Mitsuhisa Sato and Eduard Ayguad{\'e} and Dongsheng Wang", booktitle = "{A Practical Programming Model for the Multi-Core Era: 3rd International Workshop on OpenMP, IWOMP 2007, Beijing, China, June 3--7, 2007 Proceedings}", title = "{A Practical Programming Model for the Multi-Core Era: 3rd International Workshop on OpenMP, IWOMP 2007, Beijing, China, June 3--7, 2007 Proceedings}", volume = "4935", publisher = pub-SV, address = pub-SV:adr, pages = "184 (est.)", year = "2008", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-540-69303-1", ISBN = "3-540-69302-5 (print), 3-540-69303-3 (e-book)", ISBN-13 = "978-3-540-69302-4 (print), 978-3-540-69303-1 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:20:29 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-69303-1", acknowledgement = ack-nhfb, } @Proceedings{Eigenmann:2008:ONE, editor = "Rudolf Eigenmann and Bronis R. de Supinski", booktitle = "{OpenMP in a New Era of Parallelism: 4th International Workshop, IWOMP 2008 West Lafayette, IN, USA, May 12--14, 2008 Proceedings}", title = "{OpenMP in a New Era of Parallelism: 4th International Workshop, IWOMP 2008 West Lafayette, IN, USA, May 12--14, 2008 Proceedings}", volume = "5004", publisher = pub-SV, address = pub-SV:adr, pages = "190 (est.)", year = "2008", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-540-79561-2", ISBN = "3-540-79560-X (print), 3-540-79561-8 (e-book)", ISBN-13 = "978-3-540-79560-5 (print), 978-3-540-79561-2 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:21:59 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-79561-2", acknowledgement = ack-nhfb, } @Proceedings{Lastovetsky:2008:RAP, editor = "Alexey Lastovetsky and Tahar Kechadi and Jack Dongarra", booktitle = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 15th European PVM\slash MPI Users' Group Meeting, Dublin, Ireland, September 7--10, 2008. Proceedings}", title = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 15th European PVM\slash MPI Users' Group Meeting, Dublin, Ireland, September 7--10, 2008. Proceedings}", volume = "5205", publisher = pub-SV, address = pub-SV:adr, pages = "129 (est.)", year = "2008", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-540-87475-1", ISBN = "3-540-87474-7 (print), 3-540-87475-5 (e-book)", ISBN-13 = "978-3-540-87474-4 (print), 978-3-540-87475-1 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:17:37 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-87475-1", acknowledgement = ack-nhfb, } @Proceedings{Mueller:2008:OSM, editor = "Matthias S. Mueller and Barbara M. Chapman and Bronis R. de Supinski and Allen D. Malony and Michael Voss", booktitle = "{OpenMP Shared Memory Parallel Programming: International Workshops, IWOMP 2005 and IWOMP 2006, Eugene, OR, USA, June 1--4, 2005, Reims, France, June 12--15, 2006. Proceedings}", title = "{OpenMP Shared Memory Parallel Programming: International Workshops, IWOMP 2005 and IWOMP 2006, Eugene, OR, USA, June 1--4, 2005, Reims, France, June 12--15, 2006. Proceedings}", volume = "4315", publisher = pub-SV, address = pub-SV:adr, pages = "252 (est.)", year = "2008", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-540-68555-5", ISBN = "3-540-68554-5 (print), 3-540-68555-3 (e-book)", ISBN-13 = "978-3-540-68554-8 (print), 978-3-540-68555-5 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:24:26 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-540-68555-5", acknowledgement = ack-nhfb, } @Book{Nguyen:2008:GG, editor = "Hubert Nguyen", booktitle = "{GPU} gems 3", title = "{GPU} gems 3", volume = "3", publisher = pub-AW, address = pub-AW:adr, pages = "l + 942", year = "2008", ISBN = "0-321-51526-9", ISBN-13 = "978-0-321-51526-1", LCCN = "T385 .G6882 2008", bibdate = "Thu Jul 29 13:36:54 MDT 2010", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/unix.bib; z3950.loc.gov:7090/Voyager", series = "GPU gems", URL = "http://www.loc.gov/catdir/toc/ecip0720/2007023985.html", acknowledgement = ack-nhfb, keywords = "CUDA; nVIDIA", subject = "Computer graphics; Real-time programming", } @Proceedings{Muller:2009:EOA, editor = "Matthias S. M{\"u}ller and Bronis R. de Supinski and Barbara M. Chapman", booktitle = "{Evolving OpenMP in an Age of Extreme Parallelism: 5th International Workshop on OpenMP, IWOMP 2009 Dresden, Germany, June 3--5, 2009 Proceedings}", title = "{Evolving OpenMP in an Age of Extreme Parallelism: 5th International Workshop on OpenMP, IWOMP 2009 Dresden, Germany, June 3--5, 2009 Proceedings}", volume = "5568", publisher = pub-SV, address = pub-SV:adr, pages = "182 (est.)", year = "2009", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-02303-3", ISBN = "3-642-02284-7 (print), 3-642-02303-7 (e-book)", ISBN-13 = "978-3-642-02284-5 (print), 978-3-642-02303-3 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:25:20 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-02303-3", acknowledgement = ack-nhfb, } @Proceedings{Ropo:2009:RAP, editor = "Matti Ropo and Jan Westerholm and Jack Dongarra", booktitle = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 16th European PVM\slash MPI Users' Group Meeting, Espoo, Finland, September 7--10, 2009. Proceedings}", title = "{Recent Advances in Parallel Virtual Machine and Message Passing Interface: 16th European PVM\slash MPI Users' Group Meeting, Espoo, Finland, September 7--10, 2009. Proceedings}", volume = "5759", publisher = pub-SV, address = pub-SV:adr, pages = "142 (est.)", year = "2009", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-03770-2", ISBN = "3-642-03769-0 (print), 3-642-03770-4 (e-book)", ISBN-13 = "978-3-642-03769-6 (print), 978-3-642-03770-2 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:20:58 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-03770-2", acknowledgement = ack-nhfb, } @Proceedings{Tuncer:2009:PCF, editor = "Ismail H. Tuncer and {\"U}lgen G{\"u}lcat and David R. Emerson and Kenichi Matsuno", booktitle = "{Parallel Computational Fluid Dynamics 2007: Implementations and Experiences on Large Scale and Grid Computing}", title = "{Parallel Computational Fluid Dynamics 2007: Implementations and Experiences on Large Scale and Grid Computing}", volume = "67", publisher = pub-SV, address = pub-SV:adr, bookpages = "xi + 480", pages = "xi + 480", year = "2009", CODEN = "LNCSA6", ISBN = "3-540-92743-3 (print), 3-540-92744-1 (e-book)", ISBN-13 = "978-3-540-92743-3 (print), 978-3-540-92744-0 (e-book)", ISSN = "1439-7358", ISSN-L = "1439-7358", LCCN = "????", bibdate = "Thu Dec 20 14:35:19 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Parallel CFD 2007 was held in Antalya, Turkey, from May 21 to 24, 2007.", series = ser-LNCSE, URL = "http://link.springer.com/book/10.1007/978-3-540-92744-0; http://www.springerlink.com/content/978-3-540-92744-0", acknowledgement = ack-nhfb, series-URL = "http://link.springer.com/bookseries/3527", tableofcontents = "Numerical Simulation of a Spinning Projectile Using Parallel and Vectorized Unstructured Flow Solver \\ Development of a Framework for Parallel Simulators with Various Physics and its Performance \\ Experience in Parallel Computational Mechanics on Marenostrum \\ New Approaches to Modeling Rarefied Gas Flow in the Slip and Transition Regime \\ A Parallel Scientific Software for Heterogeneous Hydrogeology \\ Aerodynamic Shape Optimization Methods on Multiprocessor Platforms \\ Non-Sinusoidal Path Optimization of Dual Airfoils Flapping in a Biplane Configuration Parallel Computation of $3$-D Viscous Flows on Hybrid Grids Implementation of Parallel DSMC Method to Adiabatic Piston Problem \\ Efficient Parallel Algorithm for Multiconstrained Optimization of Wing-Body Configurations \\ Parallel Three Dimensional Direct Simulation Monte Carlo for Simulating Micro Flows \\ A Study on the Prediction of the Aerodynamic Characteristics of an Orbital Block of a Launch Vehicle in the Rarefied Flow Regime Using the DSMC Approach and the Parallel Computation \\ Parallel Solution of a $3$-D Mixed Convection Problem Computation of Hypersonic Flow of a Diatomic Gas in Rotational Non-Equilibrium Past a Blunt Body Using the Generalized Boltzmann Equation Application of Parallel Processing to Numerical Modeling of Two-Phase Deflagration-to-Detonation (DDT) Phenomenon \\ Highly Scalable Multiphysics Computational Framework for Propulsive Energetic Systems \\ A Parallel Aitken-Additive Schwarz Waveform Relaxation Method for Parabolic Problems \\ Parallel Computation of Incompressible Flows Driven by Moving Multiple Obstacles Using a New Moving Embedded-Grid Method \\ Parallel Computing on Network of Windows Based PCs Parallel Computations of Droplet Oscillations Cyclic Distribution of Pipelined Parallel Deferred Correction Method for ODE/DAE \\ Hybrid Parallelization Techniques for Lattice Boltzmann Free Surface Flows \\ Flow-Structure Interaction and Flow Analysis of Hydraulic Machineron a Computational Grid \\ Parallel Computation of Incompressible Flow Using Building-Cube Method \\ $3$D Model of Pollution Distribution in City Air and its Parallel Realization \\ Parallel Navier-Stokes Solution of a Wing-Flap Configuration on Structured Multi-Block Oversetting Grids Parallel Navier-Stokes Solutions of NASA 65\? Delta-Wing Parallel Turbulent Navier-Stokes Solutions of Wing alone Geometries for Drag Prediction \\ Adaptive Aitken-Schwarz for Darcy $3$D Flow on Heterogeneous Media \\ Numerical Simulation of Compressible Flow using Three-Dimensional Unstructured Added/Eliminated Grid Method \\ Technology of Parallelization for $2$D and $3$D CFD/CAA Codes based on High-Accuracy Explicit Methods on Unstructured Meshes \\ Separate Treatment of Momentum and Heat Flows in Parallel Environment \\ DNS of Turbulent Natural Convection Flows on the Mare Nostrum Supercomputer Termo Fluids: A New Parallel Unstructured CFD Code for the Simulation of Turbulent Industrial Problems on Low Cost PC Cluster", } @Proceedings{Chaudhuri:2010:PIC, editor = "Pranay Chaudhuri and Sukumar Ghosh and Raj Kumar Buyya and Jian-Nong Cao and Oeepak Oahiya", booktitle = "{Proceedings of the 2010 1st International Conference on Parallel Distributed and Grid Computing (PDGC), Jaypee University of Information Technology Waknaghat, Solan, HP, India, 28--30 October, 2010}", title = "{Proceedings of the 2010 1st International Conference on Parallel Distributed and Grid Computing (PDGC), Jaypee University of Information Technology Waknaghat, Solan, HP, India, 28--30 October, 2010}", publisher = pub-IEEE, address = pub-IEEE:adr, bookpages = "xiii + 382", pages = "xiii + 382", year = "2010", ISBN = "1-4244-7675-5", ISBN-13 = "978-1-4244-7675-6", LCCN = "????", bibdate = "Thu Apr 21 10:51:00 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", acknowledgement = ack-nhfb, } @Proceedings{Keller:2010:RAM, editor = "Rainer Keller and Edgar Gabriel and Michael Resch and Jack Dongarra", booktitle = "{Recent Advances in the Message Passing Interface: 17th European MPI Users' Group Meeting, EuroMPI 2010, Stuttgart, Germany, September 12--15, 2010. Proceedings}", title = "{Recent Advances in the Message Passing Interface: 17th European MPI Users' Group Meeting, EuroMPI 2010, Stuttgart, Germany, September 12--15, 2010. Proceedings}", volume = "6305", publisher = pub-SV, address = pub-SV:adr, pages = "197 (est.)", year = "2010", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-15646-5", ISBN = "3-642-15645-2 (print), 3-642-15646-0 (e-book)", ISBN-13 = "978-3-642-15645-8 (print), 978-3-642-15646-5 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:24:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-15646-5", acknowledgement = ack-nhfb, } @Proceedings{Sato:2010:BLL, editor = "Mitsuhisa Sato and Toshihiro Hanawa and Matthias S. M{\"u}ller and Barbara M. Chapman and Bronis R. de Supinski", booktitle = "{Beyond Loop Level Parallelism in OpenMP: Accelerators, Tasking and More: 6th International Workshop on OpenMP, IWOMP 2010, Tsukuba, Japan, June 14--16, 2010 Proceedings}", title = "{Beyond Loop Level Parallelism in OpenMP: Accelerators, Tasking and More: 6th International Workshop on OpenMP, IWOMP 2010, Tsukuba, Japan, June 14--16, 2010 Proceedings}", volume = "6132", publisher = pub-SV, address = pub-SV:adr, pages = "173 (est.)", year = "2010", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-13217-9", ISBN = "3-642-13216-2 (print), 3-642-13217-0 (e-book)", ISBN-13 = "978-3-642-13216-2 (print), 978-3-642-13217-9 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:20:26 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-13217-9", acknowledgement = ack-nhfb, } @Proceedings{ACM:2011:SSP, editor = "{ACM}", booktitle = "{SC '11 State of the Practice Reports}", title = "{SC '11 State of the Practice Reports}", publisher = pub-ACM, address = pub-ACM:adr, year = "2011", ISBN = "1-4503-1139-3", ISBN-13 = "978-1-4503-1139-7", LCCN = "????", bibdate = "Fri Dec 16 11:20:09 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib", acknowledgement = ack-nhfb, } @Proceedings{Chapman:2011:OPE, editor = "Barbara M. Chapman and William D. Gropp and Kalyan Kumaran and Matthias S. M{\"u}ller", booktitle = "{OpenMP in the Petascale Era: 7th International Workshop on OpenMP, IWOMP 2011, Chicago, IL, USA, June 13--15, 2011. Proceedings}", title = "{OpenMP in the Petascale Era: 7th International Workshop on OpenMP, IWOMP 2011, Chicago, IL, USA, June 13--15, 2011. Proceedings}", volume = "6665", publisher = pub-SV, address = pub-SV:adr, pages = "178 (est.)", year = "2011", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-21487-5", ISBN = "3-642-21486-X (print), 3-642-21487-8 (e-book)", ISBN-13 = "978-3-642-21486-8 (print), 978-3-642-21487-5 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:23:23 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-21487-5", acknowledgement = ack-nhfb, } @Proceedings{Cotronis:2011:RAM, editor = "Yiannis Cotronis and Anthony Danalis and Dimitrios S. Nikolopoulos and Jack Dongarra", booktitle = "{Recent Advances in the Message Passing Interface: 18th European MPI Users' Group Meeting, EuroMPI 2011, Santorini, Greece, September 18--21, 2011. Proceedings}", title = "{Recent Advances in the Message Passing Interface: 18th European MPI Users' Group Meeting, EuroMPI 2011, Santorini, Greece, September 18--21, 2011. Proceedings}", volume = "6960", publisher = pub-SV, address = pub-SV:adr, pages = "177 (est.)", year = "2011", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-24449-0", ISBN = "3-642-24448-3 (print), 3-642-24449-1 (e-book)", ISBN-13 = "978-3-642-24448-3 (print), 978-3-642-24449-0 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:21:14 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-24449-0", acknowledgement = ack-nhfb, } @Proceedings{Lathrop:2011:SPI, editor = "Scott Lathrop and Jim Costa and William Kramer", booktitle = "{SC'11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, Seattle, WA, November 12--18 2011}", title = "{SC'11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, Seattle, WA, November 12--18 2011}", publisher = pub-ACM # " and " # pub-IEEE, address = pub-ACM:adr # " and " # pub-IEEE:adr, pages = "????", year = "2011", ISBN = "1-4503-0771-X", ISBN-13 = "978-1-4503-0771-0", LCCN = "????", bibdate = "Fri Dec 16 11:11:35 2011", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib", acknowledgement = ack-nhfb, xxeditor = "{ACM}", } @Proceedings{Tromeur-Dervout:2011:PCF, editor = "Damien Tromeur-Dervout and Gunther Brenner and David R. Emerson and Jocelyne Erhel", booktitle = "{Parallel Computational Fluid Dynamics 2008: Parallel Numerical Methods, Software Development and Applications}", title = "{Parallel Computational Fluid Dynamics 2008: Parallel Numerical Methods, Software Development and Applications}", volume = "74", publisher = pub-SV, address = pub-SV:adr, bookpages = "xi + 432", pages = "xi + 432", year = "2011", CODEN = "LNCSA6", DOI = "https://doi.org/10.1007/978-3-642-14438-7", ISBN = "3-642-14437-3 (print), 3-642-14438-1 (e-book)", ISBN-13 = "978-3-642-14437-0 (print), 978-3-642-14438-7 (e-book)", ISSN = "1439-7358", ISSN-L = "1439-7358", LCCN = "????", bibdate = "Thu Dec 20 14:35:30 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", note = "Proceedings of the twentieth meeting, Parallel CFD 2008, held May 19--22, 2008 in Lyon, France.", series = ser-LNCSE, URL = "http://link.springer.com/book/10.1007/978-3-642-14438-7; http://www.springerlink.com/content/978-3-642-14438-7", acknowledgement = ack-nhfb, series-URL = "http://link.springer.com/bookseries/3527", tableofcontents = "Preface \\ Contents \\ Part I: Invited speakers \\ Part II: Optimisation in Aerodynamics Design \\ Part III: Grid methods \\ Part IV: Boundary methods \\ Part V: High Order methods \\ Part VI: Parallel Algorithms and Solvers \\ Part VII: Lattice Boltzman and SPH Methods \\ Part VIII: software Framework and Component Architecture \\ Part IX: Parallel Performance \\ Part X: Environment and biofluids applications \\ Part XI: General fluid \\ Editorial Policy", } @Proceedings{Chapman:2012:OHW, editor = "Barbara M. Chapman and Federico Massaioli and Matthias S. M{\"u}ller and Marco Rorro", booktitle = "{OpenMP in a Heterogeneous World: 8th International Workshop on OpenMP, IWOMP 2012, Rome, Italy, June 11--13, 2012. Proceedings}", title = "{OpenMP in a Heterogeneous World: 8th International Workshop on OpenMP, IWOMP 2012, Rome, Italy, June 11--13, 2012. Proceedings}", volume = "7312", publisher = pub-SV, address = pub-SV:adr, pages = "257 (est.)", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-30961-8", ISBN = "3-642-30960-7 (print), 3-642-30961-5 (e-book)", ISBN-13 = "978-3-642-30960-1 (print), 978-3-642-30961-8 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:19:49 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-30961-8", acknowledgement = ack-nhfb, } @Proceedings{Hollingsworth:2012:SPI, editor = "Jeffrey Hollingsworth", booktitle = "{SC '12: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, Salt Lake Convention Center, Salt Lake City, UT, USA, November 10--16, 2012}", title = "{SC '12: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, Salt Lake Convention Center, Salt Lake City, UT, USA, November 10--16, 2012}", publisher = pub-IEEE, address = pub-IEEE:adr, year = "2012", ISBN = "1-4673-0804-8", ISBN-13 = "978-1-4673-0804-5", bibdate = "Thu Nov 15 07:35:55 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib; https://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib", acknowledgement = ack-nhfb, } @Proceedings{Traff:2012:RAM, editor = "Jesper Larsson Tr{\"a}ff and Siegfried Benkner and Jack J. Dongarra", booktitle = "{Recent Advances in the Message Passing Interface: 19th European MPI Users' Group Meeting, EuroMPI 2012, Vienna, Austria, September 23--26, 2012. Proceedings}", title = "{Recent Advances in the Message Passing Interface: 19th European MPI Users' Group Meeting, EuroMPI 2012, Vienna, Austria, September 23--26, 2012. Proceedings}", volume = "7490", publisher = pub-SV, address = pub-SV:adr, pages = "162 (est.)", year = "2012", CODEN = "LNCSD9", DOI = "https://doi.org/10.1007/978-3-642-33518-1", ISBN = "3-642-33517-9 (print), 3-642-33518-7 (e-book)", ISBN-13 = "978-3-642-33517-4 (print), 978-3-642-33518-1 (e-book)", ISSN = "0302-9743 (print), 1611-3349 (electronic)", ISSN-L = "0302-9743", LCCN = "????", bibdate = "Wed Dec 19 15:23:42 MST 2012", bibsource = "https://www.math.utah.edu/pub/tex/bib/lncs.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCS, URL = "http://www.springerlink.com/content/978-3-642-33518-1", acknowledgement = ack-nhfb, } @Proceedings{Erhel:2014:DDM, editor = "Jocelyne Erhel and Martin J. Gander and Laurence Halpern and G{\'e}raldine Pichot and Taoufik Sassi and Olof Widlund", booktitle = "Domain Decomposition Methods in Science and Engineering {XXI}", title = "Domain Decomposition Methods in Science and Engineering {XXI}", volume = "98", publisher = pub-SV, address = pub-SV:adr, pages = "xx + 973 + 220", year = "2014", DOI = "https://doi.org/10.1007/978-3-319-05789-7", ISBN = "3-319-05788-X (paperback), 3-319-05789-8 (e-book)", ISBN-13 = "978-3-319-05788-0 (paperback), 978-3-319-05789-7 (e-book)", ISSN = "1439-7358 (print), 2197-7100 (electronic)", ISSN-L = "1439-7358", LCCN = "QA71-90", bibdate = "Sat Dec 12 10:43:35 MST 2015", bibsource = "fsz3950.oclc.org:210/WorldCat; https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCSE, URL = "http://0-dx.doi.org.fama.us.es/10.1007/978-3-319-05789-7", abstract = "This volume contains a selection of papers presented at the 21st international conference on domain decomposition methods in science and engineering held in Rennes, France, June 25-29, 2012. Domain decomposition is an active and interdisciplinary research discipline, focusing on the development, analysis and implementation of numerical methods for massively parallel computers. Domain decomposition methods are among the most efficient solvers for large scale applications in science and engineering. They are based on a solid theoretical foundation and shown to be scalable for many important applications. Domain decomposition techniques can also naturally take into account multiscale phenomena. This book contains the most recent results in this important field of research, both mathematically and algorithmically and allows the reader to get an overview of this exciting branch of numerical analysis and scientific computing.", acknowledgement = ack-nhfb, tableofcontents = "Preface \\ Part I: Plenary Presentations \\ Part II: Minisymposia \\ Part III: Contributed Presentations", } @Book{Mehl:2015:RTC, editor = "Miriam Mehl and Manfred Bischoff and Michael Sch{\"a}fer", booktitle = "Recent Trends in Computational Engineering --- {CE2014}: Optimization, Uncertainty, Parallel Algorithms, Coupled and Complex Problems", title = "Recent Trends in Computational Engineering --- {CE2014}: Optimization, Uncertainty, Parallel Algorithms, Coupled and Complex Problems", volume = "105", publisher = pub-SV, address = pub-SV:adr, pages = "317 (est.)", year = "2015", ISBN = "3-319-22996-6, 3-319-22997-4 (e-book)", ISBN-13 = "978-3-319-22996-6, 978-3-319-22997-3 (e-book)", LCCN = "QA71-90; TA329", bibdate = "Sat Dec 12 10:43:43 MST 2015", bibsource = "fsz3950.oclc.org:210/WorldCat; https://www.math.utah.edu/pub/tex/bib/lncse.bib; https://www.math.utah.edu/pub/tex/bib/matlab.bib; https://www.math.utah.edu/pub/tex/bib/pvm.bib", series = ser-LNCSE, URL = "http://www.springerlink.com/content/978-3-319-22997-3", acknowledgement = ack-nhfb, meetingname = "International Workshop on Computational Engineering (3rd : 2014 : Stuttgart, Germany)", subject = "Engineering mathematics; Congresses; TECHNOLOGY and ENGINEERING / Engineering (General); TECHNOLOGY and ENGINEERING / Reference; Engineering mathematics.", }